| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 |
| ; |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL |
| ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=all -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX |
| |
| define i32 @masked_load(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_load' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:31 CodeSize:38 Lat:38 SizeLat:38 for: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:33 Lat:33 SizeLat:33 for: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:23 CodeSize:28 Lat:28 SizeLat:28 for: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:16 Lat:16 SizeLat:16 for: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:75 CodeSize:91 Lat:91 SizeLat:91 for: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:71 CodeSize:86 Lat:86 SizeLat:86 for: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:67 CodeSize:81 Lat:81 SizeLat:81 for: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:63 CodeSize:76 Lat:76 SizeLat:76 for: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:59 CodeSize:71 Lat:71 SizeLat:71 for: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:66 Lat:66 SizeLat:66 for: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:61 Lat:61 SizeLat:61 for: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:47 CodeSize:56 Lat:56 SizeLat:56 for: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:40 Lat:40 SizeLat:40 for: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:35 Lat:35 SizeLat:35 for: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:25 CodeSize:30 Lat:30 SizeLat:30 for: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:17 Lat:17 SizeLat:17 for: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:43 CodeSize:51 Lat:51 SizeLat:51 for: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:38 CodeSize:45 Lat:45 SizeLat:45 for: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:39 Lat:39 SizeLat:39 for: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:33 Lat:33 SizeLat:33 for: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:25 Lat:25 SizeLat:25 for: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:19 Lat:19 SizeLat:19 for: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:91 CodeSize:107 Lat:107 SizeLat:107 for: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:86 CodeSize:101 Lat:101 SizeLat:101 for: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:81 CodeSize:95 Lat:95 SizeLat:95 for: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:76 CodeSize:89 Lat:89 SizeLat:89 for: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:71 CodeSize:83 Lat:83 SizeLat:83 for: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:66 CodeSize:77 Lat:77 SizeLat:77 for: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:71 Lat:71 SizeLat:71 for: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:65 Lat:65 SizeLat:65 for: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:45 CodeSize:53 Lat:53 SizeLat:53 for: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:47 Lat:47 SizeLat:47 for: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:41 Lat:41 SizeLat:41 for: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:35 Lat:35 SizeLat:35 for: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:20 Lat:20 SizeLat:20 for: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:158 CodeSize:190 Lat:190 SizeLat:190 for: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:79 CodeSize:95 Lat:95 SizeLat:95 for: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:376 CodeSize:440 Lat:440 SizeLat:440 for: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:188 CodeSize:220 Lat:220 SizeLat:220 for: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:94 CodeSize:110 Lat:110 SizeLat:110 for: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:46 CodeSize:54 Lat:54 SizeLat:54 for: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_load' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:32 Lat:32 SizeLat:32 for: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:28 SizeLat:28 for: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:19 CodeSize:24 Lat:24 SizeLat:24 for: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:14 SizeLat:14 for: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:60 CodeSize:76 Lat:76 SizeLat:76 for: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:71 Lat:71 SizeLat:71 for: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:52 CodeSize:66 Lat:66 SizeLat:66 for: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:61 Lat:61 SizeLat:61 for: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:57 Lat:57 SizeLat:57 for: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:41 CodeSize:52 Lat:52 SizeLat:52 for: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:37 CodeSize:47 Lat:47 SizeLat:47 for: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:33 CodeSize:42 Lat:42 SizeLat:42 for: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:30 CodeSize:38 Lat:38 SizeLat:38 for: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:26 CodeSize:33 Lat:33 SizeLat:33 for: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:28 SizeLat:28 for: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:23 Lat:23 SizeLat:23 for: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:14 SizeLat:14 for: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:35 Lat:35 SizeLat:35 for: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:30 SizeLat:30 for: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:25 SizeLat:25 for: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:15 SizeLat:15 for: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:60 CodeSize:75 Lat:75 SizeLat:75 for: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:70 Lat:70 SizeLat:70 for: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:52 CodeSize:65 Lat:65 SizeLat:65 for: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:60 Lat:60 SizeLat:60 for: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:55 Lat:55 SizeLat:55 for: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:50 Lat:50 SizeLat:50 for: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:45 Lat:45 SizeLat:45 for: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:35 Lat:35 SizeLat:35 for: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:30 SizeLat:30 for: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:25 SizeLat:25 for: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:15 SizeLat:15 for: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:160 SizeLat:160 for: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:320 SizeLat:320 for: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:160 SizeLat:160 for: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX-LABEL: 'masked_load' |
| ; AVX-NEXT: Cost Model: Found costs of 4 for: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 4 for: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 4 for: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 4 for: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 5 for: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:131 CodeSize:163 Lat:163 SizeLat:163 for: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:260 CodeSize:324 Lat:324 SizeLat:324 for: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; KNL-LABEL: 'masked_load' |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:132 CodeSize:164 Lat:164 SizeLat:164 for: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:262 CodeSize:326 Lat:326 SizeLat:326 for: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKX-LABEL: 'masked_load' |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x double> undef) |
| %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x double> undef) |
| %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x double> undef) |
| %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x double> undef) |
| %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| |
| %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x float> undef) |
| %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x float> undef) |
| %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x float> undef) |
| %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x float> undef) |
| %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x float> undef) |
| %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x float> undef) |
| %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x float> undef) |
| %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x float> undef) |
| %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x float> undef) |
| %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x float> undef) |
| %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x float> undef) |
| %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x float> undef) |
| |
| %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i64> undef) |
| %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i64> undef) |
| %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i64> undef) |
| %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i64> undef) |
| %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| |
| %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> %m15, <15 x i32> undef) |
| %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> %m14, <14 x i32> undef) |
| %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> %m13, <13 x i32> undef) |
| %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> %m12, <12 x i32> undef) |
| %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> %m11, <11 x i32> undef) |
| %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> %m10, <10 x i32> undef) |
| %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> %m9, <9 x i32> undef) |
| %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> %m7, <7 x i32> undef) |
| %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> %m6, <6 x i32> undef) |
| %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> %m5, <5 x i32> undef) |
| %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> %m3, <3 x i32> undef) |
| %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> %m1, <1 x i32> undef) |
| |
| %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| |
| %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| |
| ret i32 0 |
| } |
| |
| define i32 @masked_store(<1 x i1> %m1, <2 x i1> %m2, <3 x i1> %m3, <4 x i1> %m4, <5 x i1> %m5, <6 x i1> %m6, <7 x i1> %m7, <8 x i1> %m8, <9 x i1> %m9, <10 x i1> %m10, <11 x i1> %m11, <12 x i1> %m12, <13 x i1> %m13, <14 x i1> %m14, <15 x i1> %m15, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_store' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:30 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:26 Lat:26 SizeLat:26 for: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:15 SizeLat:15 for: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:75 CodeSize:91 Lat:91 SizeLat:91 for: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:70 CodeSize:85 Lat:85 SizeLat:85 for: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:65 CodeSize:79 Lat:79 SizeLat:79 for: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:60 CodeSize:73 Lat:73 SizeLat:73 for: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:68 Lat:68 SizeLat:68 for: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:62 Lat:62 SizeLat:62 for: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:46 CodeSize:56 Lat:56 SizeLat:56 for: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:41 CodeSize:50 Lat:50 SizeLat:50 for: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:39 Lat:39 SizeLat:39 for: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:27 Lat:27 SizeLat:27 for: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:16 Lat:16 SizeLat:16 for: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:43 CodeSize:51 Lat:51 SizeLat:51 for: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:44 Lat:44 SizeLat:44 for: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:32 CodeSize:38 Lat:38 SizeLat:38 for: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:31 Lat:31 SizeLat:31 for: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:21 CodeSize:25 Lat:25 SizeLat:25 for: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:91 CodeSize:107 Lat:107 SizeLat:107 for: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:85 CodeSize:100 Lat:100 SizeLat:100 for: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:79 CodeSize:93 Lat:93 SizeLat:93 for: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:73 CodeSize:86 Lat:86 SizeLat:86 for: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:68 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:62 CodeSize:73 Lat:73 SizeLat:73 for: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:56 CodeSize:66 Lat:66 SizeLat:66 for: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:50 CodeSize:59 Lat:59 SizeLat:59 for: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:45 CodeSize:53 Lat:53 SizeLat:53 for: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:46 Lat:46 SizeLat:46 for: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:39 Lat:39 SizeLat:39 for: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:32 Lat:32 SizeLat:32 for: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:158 CodeSize:190 Lat:190 SizeLat:190 for: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:79 CodeSize:95 Lat:95 SizeLat:95 for: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:376 CodeSize:440 Lat:440 SizeLat:440 for: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:188 CodeSize:220 Lat:220 SizeLat:220 for: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:94 CodeSize:110 Lat:110 SizeLat:110 for: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:46 CodeSize:54 Lat:54 SizeLat:54 for: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_store' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:31 Lat:31 SizeLat:31 for: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:21 CodeSize:27 Lat:27 SizeLat:27 for: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:17 CodeSize:22 Lat:22 SizeLat:22 for: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:10 CodeSize:13 Lat:13 SizeLat:13 for: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:60 CodeSize:76 Lat:76 SizeLat:76 for: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:52 CodeSize:66 Lat:66 SizeLat:66 for: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:61 Lat:61 SizeLat:61 for: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:57 Lat:57 SizeLat:57 for: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:41 CodeSize:52 Lat:52 SizeLat:52 for: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:37 CodeSize:47 Lat:47 SizeLat:47 for: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:33 CodeSize:42 Lat:42 SizeLat:42 for: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:30 CodeSize:38 Lat:38 SizeLat:38 for: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:26 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:22 CodeSize:28 Lat:28 SizeLat:28 for: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:18 CodeSize:23 Lat:23 SizeLat:23 for: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:14 Lat:14 SizeLat:14 for: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:30 SizeLat:30 for: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:25 SizeLat:25 for: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:15 SizeLat:15 for: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:60 CodeSize:75 Lat:75 SizeLat:75 for: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:56 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:52 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:48 CodeSize:60 Lat:60 SizeLat:60 for: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:44 CodeSize:55 Lat:55 SizeLat:55 for: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:40 CodeSize:50 Lat:50 SizeLat:50 for: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:36 CodeSize:45 Lat:45 SizeLat:45 for: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:28 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:24 CodeSize:30 Lat:30 SizeLat:30 for: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:20 CodeSize:25 Lat:25 SizeLat:25 for: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:15 Lat:15 SizeLat:15 for: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:160 SizeLat:160 for: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:256 CodeSize:320 Lat:320 SizeLat:320 for: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:128 CodeSize:160 Lat:160 SizeLat:160 for: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX-LABEL: 'masked_store' |
| ; AVX-NEXT: Cost Model: Found costs of 16 for: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; AVX-NEXT: Cost Model: Found costs of 16 for: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; AVX-NEXT: Cost Model: Found costs of 16 for: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; AVX-NEXT: Cost Model: Found costs of 16 for: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; AVX-NEXT: Cost Model: Found costs of 17 for: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:131 CodeSize:163 Lat:163 SizeLat:163 for: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:260 CodeSize:324 Lat:324 SizeLat:324 for: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; KNL-LABEL: 'masked_store' |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; KNL-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:132 CodeSize:164 Lat:164 SizeLat:164 for: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:262 CodeSize:326 Lat:326 SizeLat:326 for: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:64 CodeSize:80 Lat:80 SizeLat:80 for: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:32 CodeSize:40 Lat:40 SizeLat:40 for: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKX-LABEL: 'masked_store' |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> %m7) |
| call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> %m6) |
| call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> %m5) |
| call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> %m3) |
| call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> %m15) |
| call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> %m14) |
| call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> %m13) |
| call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> %m12) |
| call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> %m11) |
| call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> %m10) |
| call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> %m9) |
| call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> %m7) |
| call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> %m6) |
| call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> %m5) |
| call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> %m3) |
| call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> %m7) |
| call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> %m6) |
| call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> %m5) |
| call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> %m3) |
| call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> %m15) |
| call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> %m14) |
| call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> %m13) |
| call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> %m12) |
| call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> %m11) |
| call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> %m10) |
| call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> %m9) |
| call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> %m7) |
| call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> %m6) |
| call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> %m5) |
| call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> %m3) |
| call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> %m32) |
| call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> %m4) |
| |
| call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> %m64) |
| call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> %m32) |
| call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> %m8) |
| |
| ret i32 0 |
| } |
| |
| define i32 @masked_gather(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_gather' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:31 CodeSize:39 Lat:39 SizeLat:39 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:93 Lat:93 SizeLat:93 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:41 Lat:41 SizeLat:41 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:384 Lat:384 SizeLat:384 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:192 Lat:192 SizeLat:192 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_gather' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:21 CodeSize:29 Lat:29 SizeLat:29 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:15 SizeLat:15 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:23 CodeSize:31 Lat:31 SizeLat:31 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:16 SizeLat:16 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:196 CodeSize:260 Lat:260 SizeLat:260 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX1-LABEL: 'masked_gather' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:108 CodeSize:140 Lat:140 SizeLat:140 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:214 CodeSize:278 Lat:278 SizeLat:278 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX2-LABEL: 'masked_gather' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKL-LABEL: 'masked_gather' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:12 CodeSize:2 Lat:12 SizeLat:12 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:12 CodeSize:2 Lat:12 SizeLat:12 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:12 CodeSize:2 Lat:12 SizeLat:12 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:12 CodeSize:2 Lat:12 SizeLat:12 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; KNL-LABEL: 'masked_gather' |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKX-LABEL: 'masked_gather' |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x double> undef) |
| %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x double> undef) |
| %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x double> undef) |
| %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x double> undef) |
| |
| %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x float> undef) |
| %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x float> undef) |
| %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x float> undef) |
| %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x float> undef) |
| |
| %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i64> undef) |
| %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i64> undef) |
| %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i64> undef) |
| %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> %m1, <1 x i64> undef) |
| |
| %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i32> undef) |
| %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i32> undef) |
| %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i32> undef) |
| %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> %m2, <2 x i32> undef) |
| |
| %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i16> undef) |
| %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i16> undef) |
| %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i16> undef) |
| %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> %m4, <4 x i16> undef) |
| |
| %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> %m64, <64 x i8> undef) |
| %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> %m32, <32 x i8> undef) |
| %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> %m16, <16 x i8> undef) |
| %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> %m8, <8 x i8> undef) |
| |
| ret i32 0 |
| } |
| |
| define i32 @masked_scatter(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_scatter' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:31 CodeSize:39 Lat:39 SizeLat:39 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:93 Lat:93 SizeLat:93 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:41 Lat:41 SizeLat:41 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:384 Lat:384 SizeLat:384 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:192 Lat:192 SizeLat:192 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_scatter' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:21 CodeSize:29 Lat:29 SizeLat:29 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:15 SizeLat:15 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:23 CodeSize:31 Lat:31 SizeLat:31 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:16 SizeLat:16 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:196 CodeSize:260 Lat:260 SizeLat:260 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX1-LABEL: 'masked_scatter' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:108 CodeSize:140 Lat:140 SizeLat:140 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:214 CodeSize:278 Lat:278 SizeLat:278 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX2-LABEL: 'masked_scatter' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKL-LABEL: 'masked_scatter' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; KNL-LABEL: 'masked_scatter' |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKX-LABEL: 'masked_scatter' |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| |
| call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> %m1) |
| |
| call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> %m2) |
| |
| call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> %m4) |
| |
| call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> %m64) |
| call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> %m32) |
| call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> %m16) |
| call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> %m8) |
| |
| ret i32 0 |
| } |
| |
| define i32 @masked_expandload(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_expandload' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:31 CodeSize:39 Lat:39 SizeLat:39 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:93 Lat:93 SizeLat:93 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:41 Lat:41 SizeLat:41 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:384 Lat:384 SizeLat:384 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:192 Lat:192 SizeLat:192 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_expandload' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:21 CodeSize:29 Lat:29 SizeLat:29 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:15 SizeLat:15 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:23 CodeSize:31 Lat:31 SizeLat:31 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:16 SizeLat:16 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:196 CodeSize:260 Lat:260 SizeLat:260 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX1-LABEL: 'masked_expandload' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:108 CodeSize:140 Lat:140 SizeLat:140 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:214 CodeSize:278 Lat:278 SizeLat:278 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX2-LABEL: 'masked_expandload' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKL-LABEL: 'masked_expandload' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX512-LABEL: 'masked_expandload' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:34 CodeSize:42 Lat:42 SizeLat:42 for: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:69 CodeSize:85 Lat:85 SizeLat:85 for: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:34 CodeSize:42 Lat:42 SizeLat:42 for: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:46 for: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:73 CodeSize:89 Lat:89 SizeLat:89 for: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:36 CodeSize:44 Lat:44 SizeLat:44 for: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> %m8, <8 x double> undef) |
| %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> %m4, <4 x double> undef) |
| %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> %m2, <2 x double> undef) |
| %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> %m1, <1 x double> undef) |
| |
| %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> %m16, <16 x float> undef) |
| %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> %m8, <8 x float> undef) |
| %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> %m4, <4 x float> undef) |
| %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> %m2, <2 x float> undef) |
| |
| %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> %m8, <8 x i64> undef) |
| %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> %m4, <4 x i64> undef) |
| %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> %m2, <2 x i64> undef) |
| %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> %m1, <1 x i64> undef) |
| |
| %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> %m16, <16 x i32> undef) |
| %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> %m8, <8 x i32> undef) |
| %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> %m4, <4 x i32> undef) |
| %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> %m2, <2 x i32> undef) |
| |
| %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> %m32, <32 x i16> undef) |
| %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> %m16, <16 x i16> undef) |
| %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> %m8, <8 x i16> undef) |
| %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> %m4, <4 x i16> undef) |
| |
| %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> %m64, <64 x i8> undef) |
| %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> %m32, <32 x i8> undef) |
| %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> %m16, <16 x i8> undef) |
| %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> %m8, <8 x i8> undef) |
| |
| ret i32 0 |
| } |
| |
| define i32 @masked_compressstore(<1 x i1> %m1, <2 x i1> %m2, <4 x i1> %m4, <8 x i1> %m8, <16 x i1> %m16, <32 x i1> %m32, <64 x i1> %m64) { |
| ; SSE2-LABEL: 'masked_compressstore' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:31 CodeSize:39 Lat:39 SizeLat:39 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:19 CodeSize:23 Lat:23 SizeLat:23 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:93 Lat:93 SizeLat:93 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:130 CodeSize:162 Lat:162 SizeLat:162 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:65 CodeSize:81 Lat:81 SizeLat:81 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:33 CodeSize:41 Lat:41 SizeLat:41 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:320 CodeSize:384 Lat:384 SizeLat:384 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:160 CodeSize:192 Lat:192 SizeLat:192 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SSE42-LABEL: 'masked_compressstore' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:21 CodeSize:29 Lat:29 SizeLat:29 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:11 CodeSize:15 Lat:15 SizeLat:15 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:23 CodeSize:31 Lat:31 SizeLat:31 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:16 SizeLat:16 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:196 CodeSize:260 Lat:260 SizeLat:260 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:98 CodeSize:130 Lat:130 SizeLat:130 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX1-LABEL: 'masked_compressstore' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:108 CodeSize:140 Lat:140 SizeLat:140 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:214 CodeSize:278 Lat:278 SizeLat:278 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX2-LABEL: 'masked_compressstore' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; SKL-LABEL: 'masked_compressstore' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:26 CodeSize:34 Lat:34 SizeLat:34 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:29 CodeSize:37 Lat:37 SizeLat:37 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:15 CodeSize:19 Lat:19 SizeLat:19 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:107 CodeSize:139 Lat:139 SizeLat:139 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:54 CodeSize:70 Lat:70 SizeLat:70 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:212 CodeSize:276 Lat:276 SizeLat:276 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:106 CodeSize:138 Lat:138 SizeLat:138 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:53 CodeSize:69 Lat:69 SizeLat:69 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:27 CodeSize:35 Lat:35 SizeLat:35 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| ; AVX512-LABEL: 'masked_compressstore' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:34 CodeSize:42 Lat:42 SizeLat:42 for: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:69 CodeSize:85 Lat:85 SizeLat:85 for: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:34 CodeSize:42 Lat:42 SizeLat:42 for: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:38 CodeSize:46 Lat:46 SizeLat:46 for: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:22 Lat:22 SizeLat:22 for: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:2 CodeSize:3 Lat:3 SizeLat:3 for: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:73 CodeSize:89 Lat:89 SizeLat:89 for: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:36 CodeSize:44 Lat:44 SizeLat:44 for: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:143 CodeSize:175 Lat:175 SizeLat:175 for: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:71 CodeSize:87 Lat:87 SizeLat:87 for: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:283 CodeSize:347 Lat:347 SizeLat:347 for: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:141 CodeSize:173 Lat:173 SizeLat:173 for: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:70 CodeSize:86 Lat:86 SizeLat:86 for: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:35 CodeSize:43 Lat:43 SizeLat:43 for: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 0 |
| ; |
| call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> %m8) |
| call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> %m4) |
| call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> %m2) |
| call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> %m1) |
| |
| call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> %m16) |
| call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> %m8) |
| call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> %m4) |
| call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> %m2) |
| |
| call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> %m8) |
| call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> %m4) |
| call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> %m2) |
| call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> %m1) |
| |
| call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> %m16) |
| call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> %m8) |
| call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> %m4) |
| call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> %m2) |
| |
| call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> %m32) |
| call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> %m16) |
| call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> %m8) |
| call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> %m4) |
| |
| call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> %m64) |
| call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> %m32) |
| call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> %m16) |
| call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> %m8) |
| |
| ret i32 0 |
| } |
| |
| define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { |
| ; SSE2-LABEL: 'test1' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:5 CodeSize:5 Lat:4 SizeLat:5 for: %mask = icmp eq <2 x i64> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; SSE42-LABEL: 'test1' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %mask = icmp eq <2 x i64> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; AVX-LABEL: 'test1' |
| ; AVX-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:2 SizeLat:2 for: %mask = icmp eq <2 x i64> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; AVX512-LABEL: 'test1' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i64> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| %mask = icmp eq <2 x i64> %trigger, zeroinitializer |
| %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) |
| ret <2 x double> %res |
| } |
| |
| define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { |
| ; SSE2-LABEL: 'test2' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SSE42-LABEL: 'test2' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX-LABEL: 'test2' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 2 for: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX512-LABEL: 'test2' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) |
| ret <4 x i32> %res |
| } |
| |
| define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { |
| ; SSE2-LABEL: 'test3' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:22 CodeSize:26 Lat:26 SizeLat:26 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test3' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX-LABEL: 'test3' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 8 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX512-LABEL: 'test3' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| %mask = icmp eq <4 x i32> %trigger, zeroinitializer |
| call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) |
| ret void |
| } |
| |
| define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { |
| ; SSE2-LABEL: 'test4' |
| ; SSE2-NEXT: Cost Model: Found costs of 2 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:37 CodeSize:45 Lat:45 SizeLat:45 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| ; SSE42-LABEL: 'test4' |
| ; SSE42-NEXT: Cost Model: Found costs of 2 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:30 CodeSize:38 Lat:38 SizeLat:38 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| ; AVX1-LABEL: 'test4' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:5 Lat:2 SizeLat:6 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; AVX1-NEXT: Cost Model: Found costs of 2 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| ; AVX2-LABEL: 'test4' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:1 SizeLat:2 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; AVX2-NEXT: Cost Model: Found costs of 2 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| ; SKL-LABEL: 'test4' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:1 SizeLat:2 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; SKL-NEXT: Cost Model: Found costs of 2 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| ; AVX512-LABEL: 'test4' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x float> %res |
| ; |
| %mask = icmp eq <8 x i32> %trigger, zeroinitializer |
| %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) |
| ret <8 x float> %res |
| } |
| |
| define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { |
| ; SSE2-LABEL: 'test5' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test5' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX-LABEL: 'test5' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX512-LABEL: 'test5' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) |
| ret void |
| } |
| |
| define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { |
| ; SSE2-LABEL: 'test6' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test6' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX-LABEL: 'test6' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 9 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX512-LABEL: 'test6' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 2 for: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) |
| ret void |
| } |
| |
| define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { |
| ; SSE2-LABEL: 'test7' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %res |
| ; |
| ; SSE42-LABEL: 'test7' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %res |
| ; |
| ; AVX-LABEL: 'test7' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %res |
| ; |
| ; AVX512-LABEL: 'test7' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 2 for: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x float> %res |
| ; |
| %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) |
| ret <2 x float> %res |
| } |
| |
| define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { |
| ; SSE2-LABEL: 'test8' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %res |
| ; |
| ; SSE42-LABEL: 'test8' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %res |
| ; |
| ; AVX-LABEL: 'test8' |
| ; AVX-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX-NEXT: Cost Model: Found costs of 3 for: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %res |
| ; |
| ; AVX512-LABEL: 'test8' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 2 for: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %res |
| ; |
| %mask = icmp eq <2 x i32> %trigger, zeroinitializer |
| %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) |
| ret <2 x i32> %res |
| } |
| |
| define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { |
| ; SSE2-LABEL: 'test_gather_2f64' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:8 CodeSize:10 Lat:10 SizeLat:10 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_2f64' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_2f64' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_2f64' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:6 CodeSize:8 Lat:8 SizeLat:8 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; SKL-LABEL: 'test_gather_2f64' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:4 SizeLat:4 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| ; AVX512-LABEL: 'test_gather_2f64' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:7 CodeSize:9 Lat:9 SizeLat:9 for: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x double> %res |
| ; |
| %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) |
| ret <2 x double> %res |
| } |
| |
| define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { |
| ; SSE2-LABEL: 'test_gather_4i32' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_4i32' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_4i32' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_4i32' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SKL-LABEL: 'test_gather_4i32' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; KNL-LABEL: 'test_gather_4i32' |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SKX-LABEL: 'test_gather_4i32' |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { |
| ; SSE2-LABEL: 'test_gather_4i32_const_mask' |
| ; SSE2-NEXT: Cost Model: Found costs of 19 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_4i32_const_mask' |
| ; SSE42-NEXT: Cost Model: Found costs of 12 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_4i32_const_mask' |
| ; AVX1-NEXT: Cost Model: Found costs of 13 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_4i32_const_mask' |
| ; AVX2-NEXT: Cost Model: Found costs of 13 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SKL-LABEL: 'test_gather_4i32_const_mask' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; KNL-LABEL: 'test_gather_4i32_const_mask' |
| ; KNL-NEXT: Cost Model: Found costs of 13 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| ; SKX-LABEL: 'test_gather_4i32_const_mask' |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> splat (i1 true), <4 x i32> %src0) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %res |
| ; |
| %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0) |
| ret <4 x i32> %res |
| } |
| |
| define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { |
| ; SSE2-LABEL: 'test_gather_16f32_const_mask' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of 60 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_16f32_const_mask' |
| ; SSE42-NEXT: Cost Model: Found costs of 8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of 44 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_16f32_const_mask' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_16f32_const_mask' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_16f32_const_mask' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX512-LABEL: 'test_gather_16f32_const_mask' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| |
| %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { |
| ; SSE2-LABEL: 'test_gather_16f32_var_mask' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_16f32_var_mask' |
| ; SSE42-NEXT: Cost Model: Found costs of 8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_16f32_var_mask' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_16f32_var_mask' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_16f32_var_mask' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX512-LABEL: 'test_gather_16f32_var_mask' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind |
| |
| %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { |
| ; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:61 CodeSize:77 Lat:77 SizeLat:77 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; SSE42-NEXT: Cost Model: Found costs of 8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:45 CodeSize:61 Lat:61 SizeLat:61 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:51 CodeSize:67 Lat:67 SizeLat:67 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:20 CodeSize:2 Lat:20 SizeLat:20 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind |
| |
| %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) |
| ret <16 x float>%res |
| } |
| |
| define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { |
| ; SSE2-LABEL: 'test_gather_16f32_const_mask2' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:8 SizeLat:8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of 60 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_16f32_const_mask2' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of 8 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of 44 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_16f32_const_mask2' |
| ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:14 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_16f32_const_mask2' |
| ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of 50 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_16f32_const_mask2' |
| ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SKL-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:24 SizeLat:24 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| ; AVX512-LABEL: 'test_gather_16f32_const_mask2' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> splat (i1 true), <16 x float> undef) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x float> %res |
| ; |
| %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| |
| %sext_ind = sext <16 x i32> %ind to <16 x i64> |
| %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind |
| |
| %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef) |
| ret <16 x float>%res |
| } |
| |
| define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { |
| ; SSE2-LABEL: 'test_scatter_16i32' |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; SSE2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:77 CodeSize:93 Lat:93 SizeLat:93 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test_scatter_16i32' |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; SSE42-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:49 CodeSize:65 Lat:65 SizeLat:65 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX1-LABEL: 'test_scatter_16i32' |
| ; AVX1-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:2 CodeSize:2 Lat:3 SizeLat:3 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; AVX1-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX2-LABEL: 'test_scatter_16i32' |
| ; AVX2-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; AVX2-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SKL-LABEL: 'test_scatter_16i32' |
| ; SKL-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; SKL-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:2 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; SKL-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; SKL-NEXT: Cost Model: Found costs of RThru:55 CodeSize:71 Lat:71 SizeLat:71 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX512-LABEL: 'test_scatter_16i32' |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| ; AVX512-NEXT: Cost Model: Found costs of 0 for: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| ; AVX512-NEXT: Cost Model: Found costs of 1 for: %imask = bitcast i16 %mask to <16 x i1> |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 |
| %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer |
| |
| %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind |
| %imask = bitcast i16 %mask to <16 x i1> |
| call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) |
| ret void |
| } |
| |
| define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { |
| ; SSE2-LABEL: 'test_scatter_8i32' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:39 CodeSize:47 Lat:47 SizeLat:47 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test_scatter_8i32' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:25 CodeSize:33 Lat:33 SizeLat:33 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX-LABEL: 'test_scatter_8i32' |
| ; AVX-NEXT: Cost Model: Found costs of RThru:28 CodeSize:36 Lat:36 SizeLat:36 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX512-LABEL: 'test_scatter_8i32' |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:10 SizeLat:10 for: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) |
| ; AVX512-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) |
| ret void |
| } |
| |
| define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { |
| ; SSE2-LABEL: 'test_scatter_4i32' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SSE42-LABEL: 'test_scatter_4i32' |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; AVX-LABEL: 'test_scatter_4i32' |
| ; AVX-NEXT: Cost Model: Found costs of RThru:14 CodeSize:18 Lat:18 SizeLat:18 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ; AVX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; KNL-LABEL: 'test_scatter_4i32' |
| ; KNL-NEXT: Cost Model: Found costs of RThru:17 CodeSize:21 Lat:21 SizeLat:21 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| ; SKX-LABEL: 'test_scatter_4i32' |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void |
| ; |
| call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) |
| ret void |
| } |
| |
| define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { |
| ; SSE2-LABEL: 'test_gather_4f32' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_4f32' |
| ; SSE42-NEXT: Cost Model: Found costs of 2 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:12 CodeSize:16 Lat:16 SizeLat:16 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_4f32' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_4f32' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:13 CodeSize:17 Lat:17 SizeLat:17 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_4f32' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; KNL-LABEL: 'test_gather_4f32' |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; KNL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; KNL-NEXT: Cost Model: Found costs of RThru:16 CodeSize:20 Lat:20 SizeLat:20 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SKX-LABEL: 'test_gather_4f32' |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SKX-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| |
| %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) |
| ret <4 x float>%res |
| } |
| |
| define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { |
| ; SSE2-LABEL: 'test_gather_4f32_const_mask' |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:2 SizeLat:2 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SSE2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SSE2-NEXT: Cost Model: Found costs of 15 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; SSE2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SSE42-LABEL: 'test_gather_4f32_const_mask' |
| ; SSE42-NEXT: Cost Model: Found costs of 2 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SSE42-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SSE42-NEXT: Cost Model: Found costs of 11 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; SSE42-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; AVX1-LABEL: 'test_gather_4f32_const_mask' |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; AVX1-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; AVX1-NEXT: Cost Model: Found costs of 12 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; AVX1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; AVX2-LABEL: 'test_gather_4f32_const_mask' |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; AVX2-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; AVX2-NEXT: Cost Model: Found costs of 12 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; AVX2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SKL-LABEL: 'test_gather_4f32_const_mask' |
| ; SKL-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SKL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SKL-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; SKL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; KNL-LABEL: 'test_gather_4f32_const_mask' |
| ; KNL-NEXT: Cost Model: Found costs of 1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; KNL-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; KNL-NEXT: Cost Model: Found costs of 12 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; KNL-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| ; SKX-LABEL: 'test_gather_4f32_const_mask' |
| ; SKX-NEXT: Cost Model: Found costs of 1 for: %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| ; SKX-NEXT: Cost Model: Found costs of 0 for: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| ; SKX-NEXT: Cost Model: Found costs of RThru:6 CodeSize:1 Lat:6 SizeLat:6 for: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> splat (i1 true), <4 x float> undef) |
| ; SKX-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x float> %res |
| ; |
| %sext_ind = sext <4 x i32> %ind to <4 x i64> |
| %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind |
| |
| %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) |
| ret <4 x float>%res |
| } |
| |
| declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) |
| declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) |
| declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) |
| declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) |
| declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) |
| declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) |
| declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) |
| declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) |
| |
| declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) |
| declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) |
| declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) |
| declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) |
| declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) |
| declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) |
| declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) |
| declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) |
| declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) |
| declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) |
| declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) |
| declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) |
| declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) |
| declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) |
| declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) |
| declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) |
| |
| declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) |
| declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) |
| declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) |
| declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) |
| declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) |
| declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) |
| declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) |
| declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) |
| |
| declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) |
| declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) |
| declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) |
| declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) |
| declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) |
| declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) |
| declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) |
| declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) |
| declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) |
| declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) |
| declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) |
| declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) |
| declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) |
| declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) |
| declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) |
| declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) |
| |
| declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) |
| declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) |
| declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) |
| declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) |
| |
| declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) |
| declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) |
| declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) |
| declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) |
| |
| declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) |
| declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) |
| declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) |
| declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) |
| declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) |
| |
| declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) |
| declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) |
| declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) |
| declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) |
| declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) |
| declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) |
| declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) |
| declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) |
| declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) |
| declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) |
| declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) |
| declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) |
| |
| declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) |
| declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) |
| declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) |
| declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) |
| declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) |
| |
| declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) |
| declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) |
| declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) |
| declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) |
| declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) |
| declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) |
| declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) |
| declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) |
| declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) |
| declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) |
| declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) |
| declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) |
| declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) |
| declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) |
| |
| declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) |
| declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) |
| declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) |
| |
| declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) |
| declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) |
| declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) |
| declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) |
| |
| declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) |
| declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) |
| declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) |
| declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) |
| |
| declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) |
| declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) |
| declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) |
| declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) |
| |
| declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) |
| declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) |
| declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) |
| declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) |
| |
| declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) |
| declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) |
| declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) |
| declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) |
| |
| declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) |
| declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) |
| declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) |
| declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) |
| |
| declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) |
| declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) |
| declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) |
| declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) |
| |
| declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) |
| declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) |
| |
| declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) |
| declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) |
| |
| declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) |
| declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) |
| |
| declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) |
| declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) |
| declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) |
| |
| declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) |
| declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) |
| declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) |
| declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) |
| |
| declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) |
| declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) |
| declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) |
| declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) |
| |
| declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) |
| declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) |
| declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) |
| declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) |
| |
| declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) |
| declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) |
| declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) |
| declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) |
| |
| declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) |
| declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) |
| declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) |
| declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) |
| |
| declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) |
| declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) |
| declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) |
| declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) |
| |
| declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) |
| declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) |
| declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) |
| declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) |
| |
| declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) |
| declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) |
| declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) |
| declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) |
| |
| declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) |
| declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) |
| declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) |
| declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) |
| |
| declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) |
| declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) |
| declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) |
| declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) |
| |
| declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) |
| declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) |
| declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) |
| declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) |
| |
| declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) |
| declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) |
| declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) |
| declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) |
| |
| declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) |
| declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) |
| declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) |
| declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) |
| |
| declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) |
| declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) |
| declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) |
| declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) |