| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=infer-alignment,load-store-vectorizer -mcpu=sm_100 -mattr=+ptx88 -S -o - %s | FileCheck %s |
| |
| ; POSITIVE TESTS |
| |
| ; store elements 0, 1, and 3, filling the gap with a generated store of element 2 |
| define void @singleGap(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @singleGap( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 1, i64 2, i64 poison, i64 4>, ptr addrspace(1) align 32 [[OUT]], <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i64 2, ptr addrspace(1) %getElem1, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i64 4, ptr addrspace(1) %getElem3, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 1, and 3, filling the gap with a generated store of element 2 |
| define void @singleGapDouble(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @singleGapDouble( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v4f64.p1(<4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double 4.000000e+00>, ptr addrspace(1) align 32 [[OUT]], <4 x i1> <i1 true, i1 true, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store double 1.0, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store double 2.0, ptr addrspace(1) %getElem1, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store double 4.0, ptr addrspace(1) %getElem3, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 3, filling the gaps with generated stores of elements 1 and 2 |
| define void @multipleGaps(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @multipleGaps( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 1, i64 poison, i64 poison, i64 4>, ptr addrspace(1) align 32 [[OUT]], <4 x i1> <i1 true, i1 false, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(1) %out, align 32 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i64 4, ptr addrspace(1) %getElem3, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 3, 4, 7, filling the gaps with generated stores of elements 1, 2, 5, 6 |
| define void @multipleGaps8xi32(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @multipleGaps8xi32( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> <i32 1, i32 poison, i32 poison, i32 2, i32 4, i32 poison, i32 poison, i32 8>, ptr addrspace(1) align 32 [[OUT]], <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(1) %out, align 32 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i32 2, ptr addrspace(1) %getElem3, align 4 |
| %getElem4 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i32 4, ptr addrspace(1) %getElem4, align 4 |
| %getElem7 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 28 |
| store i32 8, ptr addrspace(1) %getElem7, align 4 |
| ret void |
| } |
| |
| ; store elements 0, 1, 2, 3, 5, 6, 7, filling the gap with a generated store of element 4, |
| ; resulting in two 4xi64 stores with the second one led by a gap filled store. |
| define void @singleGapLongerChain(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @singleGapLongerChain( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[GETELEM3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 24 |
| ; CHECK-NEXT: store <4 x i64> <i64 1, i64 2, i64 3, i64 4>, ptr addrspace(1) [[OUT]], align 32 |
| ; CHECK-NEXT: [[GAPFILLGEP:%.*]] = getelementptr i8, ptr addrspace(1) [[GETELEM3]], i64 8 |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 poison, i64 6, i64 7, i64 8>, ptr addrspace(1) align 32 [[GAPFILLGEP]], <4 x i1> <i1 false, i1 true, i1 true, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i64 2, ptr addrspace(1) %getElem1, align 8 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i64 3, ptr addrspace(1) %getElem2, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i64 4, ptr addrspace(1) %getElem3, align 8 |
| %getElem5 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 40 |
| store i64 6, ptr addrspace(1) %getElem5, align 8 |
| %getElem6 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 48 |
| store i64 7, ptr addrspace(1) %getElem6, align 8 |
| %getElem7 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 56 |
| store i64 8, ptr addrspace(1) %getElem7, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 1, and 3, filling the gap with a generated store of element 2 |
| define void @vectorElements(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @vectorElements( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 poison, i32 poison, i32 7, i32 8>, ptr addrspace(1) align 32 [[OUT]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true>) |
| ; CHECK-NEXT: ret void |
| ; |
| store <2 x i32> <i32 1, i32 2>, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store <2 x i32> <i32 3, i32 4>, ptr addrspace(1) %getElem1, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store <2 x i32> <i32 7, i32 8>, ptr addrspace(1) %getElem3, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 1, 3. 2 should not end up filled because 8xi64 is not legal. |
| define void @vectorElements64(ptr addrspace(1) %in) { |
| ; CHECK-LABEL: define void @vectorElements64( |
| ; CHECK-SAME: ptr addrspace(1) [[IN:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store <4 x i64> <i64 1, i64 2, i64 3, i64 4>, ptr addrspace(1) [[IN]], align 32 |
| ; CHECK-NEXT: [[GETELEM1:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[IN]], i32 48 |
| ; CHECK-NEXT: store <2 x i64> <i64 7, i64 8>, ptr addrspace(1) [[GETELEM1]], align 16 |
| ; CHECK-NEXT: ret void |
| ; |
| store <2 x i64> <i64 1, i64 2>, ptr addrspace(1) %in, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 16 |
| store <2 x i64> <i64 3, i64 4>, ptr addrspace(1) %getElem1, align 16 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 48 |
| store <2 x i64> <i64 7, i64 8>, ptr addrspace(1) %getElem3, align 16 |
| ret void |
| } |
| |
| ; store elements 0, 1, 2, extending element 3 |
| define void @extendStores(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @extendStores( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 1, i64 2, i64 3, i64 poison>, ptr addrspace(1) align 32 [[OUT]], <4 x i1> <i1 true, i1 true, i1 true, i1 false>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i64 2, ptr addrspace(1) %getElem1, align 8 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i64 3, ptr addrspace(1) %getElem2, align 8 |
| ret void |
| } |
| |
| ; store elements 0, 1, 2, 3, 4 extending elements 5, 6, 7 |
| define void @extendStores8xi32(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @extendStores8xi32( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 poison, i32 poison, i32 poison>, ptr addrspace(1) align 32 [[OUT]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>) |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 4 |
| store i32 2, ptr addrspace(1) %getElem1, align 4 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i32 3, ptr addrspace(1) %getElem2, align 4 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i32 4, ptr addrspace(1) %getElem3, align 4 |
| %getElem4 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i32 5, ptr addrspace(1) %getElem4, align 4 |
| ret void |
| } |
| |
| ; store elements 0, 1, 2, 3, 4 extending elements 5, 6, 7 |
| define void @extendStoresFromLoads8xi32(ptr addrspace(1) %in, ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @extendStoresFromLoads8xi32( |
| ; CHECK-SAME: ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 32 [[IN]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>, <8 x i32> poison) |
| ; CHECK-NEXT: [[LOAD05:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[LOAD16:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[LOAD27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 |
| ; CHECK-NEXT: [[LOAD38:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 |
| ; CHECK-NEXT: [[LOAD49:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 |
| ; CHECK-NEXT: [[EXTENDLOAD10:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 |
| ; CHECK-NEXT: [[EXTENDLOAD211:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 |
| ; CHECK-NEXT: [[EXTENDLOAD412:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[LOAD05]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[LOAD16]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[LOAD27]], i32 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[LOAD38]], i32 3 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[LOAD49]], i32 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 poison, i32 5 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 poison, i32 6 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 poison, i32 7 |
| ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP9]], ptr addrspace(1) align 32 [[OUT]], <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false>) |
| ; CHECK-NEXT: ret void |
| ; |
| %load0 = load i32, ptr addrspace(1) %in, align 32 |
| %loadGetElem1 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 4 |
| %load1 = load i32, ptr addrspace(1) %loadGetElem1, align 4 |
| %loadGetElem2 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 8 |
| %load2 = load i32, ptr addrspace(1) %loadGetElem2, align 4 |
| %loadGetElem3 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 12 |
| %load3 = load i32, ptr addrspace(1) %loadGetElem3, align 4 |
| %loadGetElem4 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 16 |
| %load4 = load i32, ptr addrspace(1) %loadGetElem4, align 4 |
| |
| store i32 %load0, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 4 |
| store i32 %load1, ptr addrspace(1) %getElem1, align 4 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i32 %load2, ptr addrspace(1) %getElem2, align 4 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i32 %load3, ptr addrspace(1) %getElem3, align 4 |
| %getElem4 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i32 %load4, ptr addrspace(1) %getElem4, align 4 |
| ret void |
| } |
| |
| ; store elements 0, 1, 3, 4, gap fill element 2, extend elements 5, 6, 7 |
| define void @extendAndGapFillStoresFromLoads8xi32(ptr addrspace(1) %in, ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @extendAndGapFillStoresFromLoads8xi32( |
| ; CHECK-SAME: ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p1(ptr addrspace(1) align 32 [[IN]], <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false>, <8 x i32> poison) |
| ; CHECK-NEXT: [[LOAD05:%.*]] = extractelement <8 x i32> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[LOAD16:%.*]] = extractelement <8 x i32> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[LOAD27:%.*]] = extractelement <8 x i32> [[TMP1]], i32 2 |
| ; CHECK-NEXT: [[LOAD38:%.*]] = extractelement <8 x i32> [[TMP1]], i32 3 |
| ; CHECK-NEXT: [[LOAD49:%.*]] = extractelement <8 x i32> [[TMP1]], i32 4 |
| ; CHECK-NEXT: [[EXTENDLOAD10:%.*]] = extractelement <8 x i32> [[TMP1]], i32 5 |
| ; CHECK-NEXT: [[EXTENDLOAD211:%.*]] = extractelement <8 x i32> [[TMP1]], i32 6 |
| ; CHECK-NEXT: [[EXTENDLOAD412:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7 |
| ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[LOAD05]], i32 0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[LOAD16]], i32 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 poison, i32 2 |
| ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[LOAD38]], i32 3 |
| ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[LOAD49]], i32 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 poison, i32 5 |
| ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 poison, i32 6 |
| ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 poison, i32 7 |
| ; CHECK-NEXT: call void @llvm.masked.store.v8i32.p1(<8 x i32> [[TMP9]], ptr addrspace(1) align 32 [[OUT]], <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false>) |
| ; CHECK-NEXT: ret void |
| ; |
| %load0 = load i32, ptr addrspace(1) %in, align 32 |
| %loadGetElem1 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 4 |
| %load1 = load i32, ptr addrspace(1) %loadGetElem1, align 4 |
| %loadGetElem3 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 12 |
| %load3 = load i32, ptr addrspace(1) %loadGetElem3, align 4 |
| %loadGetElem4 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 16 |
| %load4 = load i32, ptr addrspace(1) %loadGetElem4, align 4 |
| |
| store i32 %load0, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 4 |
| store i32 %load1, ptr addrspace(1) %getElem1, align 4 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i32 %load3, ptr addrspace(1) %getElem3, align 4 |
| %getElem4 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i32 %load4, ptr addrspace(1) %getElem4, align 4 |
| ret void |
| } |
| |
| |
| ; NEGATIVE TESTS |
| |
| ; Wrong address space, no gap filling |
| define void @singleGapWrongAddrSpace(ptr addrspace(3) %out) { |
| ; CHECK-LABEL: define void @singleGapWrongAddrSpace( |
| ; CHECK-SAME: ptr addrspace(3) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store <2 x i64> <i64 1, i64 2>, ptr addrspace(3) [[OUT]], align 32 |
| ; CHECK-NEXT: [[GETELEM3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[OUT]], i32 24 |
| ; CHECK-NEXT: store i64 4, ptr addrspace(3) [[GETELEM3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(3) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(3) %out, i32 8 |
| store i64 2, ptr addrspace(3) %getElem1, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(3) %out, i32 24 |
| store i64 4, ptr addrspace(3) %getElem3, align 8 |
| ret void |
| } |
| |
| ; Not enough alignment for masked store, but we still vectorize the smaller vector |
| define void @singleGapMisaligned(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @singleGapMisaligned( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store <2 x i64> <i64 1, i64 2>, ptr addrspace(1) [[OUT]], align 16 |
| ; CHECK-NEXT: [[GETELEM3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 24 |
| ; CHECK-NEXT: store i64 4, ptr addrspace(1) [[GETELEM3]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| store i64 1, ptr addrspace(1) %out, align 16 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i64 2, ptr addrspace(1) %getElem1, align 8 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i64 4, ptr addrspace(1) %getElem3, align 8 |
| ret void |
| } |
| |
| ; Not enough bytes to meet the minimum masked store size for the target |
| define void @singleGap4xi32(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @singleGap4xi32( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store i32 1, ptr addrspace(1) [[OUT]], align 32 |
| ; CHECK-NEXT: [[GETELEM2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 8 |
| ; CHECK-NEXT: store <2 x i32> <i32 3, i32 4>, ptr addrspace(1) [[GETELEM2]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(1) %out, align 32 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i32 3, ptr addrspace(1) %getElem2, align 4 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i32 4, ptr addrspace(1) %getElem3, align 4 |
| ret void |
| } |
| |
| ; store elements 0, 1, 2, 5, 6, 7. 3 and 4 don't get filled because the heuristic |
| ; only fills 2-element gaps that are in the middle of a multiple of 4 |
| define void @gapInWrongLocation(ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @gapInWrongLocation( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr addrspace(1) [[OUT]], align 32 |
| ; CHECK-NEXT: [[GETELEM2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 8 |
| ; CHECK-NEXT: store i32 3, ptr addrspace(1) [[GETELEM2]], align 8 |
| ; CHECK-NEXT: [[GETELEM5:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 20 |
| ; CHECK-NEXT: store i32 5, ptr addrspace(1) [[GETELEM5]], align 4 |
| ; CHECK-NEXT: [[GETELEM6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 24 |
| ; CHECK-NEXT: store <2 x i32> <i32 6, i32 7>, ptr addrspace(1) [[GETELEM6]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| store i32 1, ptr addrspace(1) %out, align 32 |
| %getElem1 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 4 |
| store i32 2, ptr addrspace(1) %getElem1, align 4 |
| %getElem2 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i32 3, ptr addrspace(1) %getElem2, align 4 |
| %getElem5 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 20 |
| store i32 5, ptr addrspace(1) %getElem5, align 4 |
| %getElem6 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i32 6, ptr addrspace(1) %getElem6, align 4 |
| %getElem7 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 28 |
| store i32 7, ptr addrspace(1) %getElem7, align 4 |
| ret void |
| } |
| |
| ; This test has 32-bytes of i8s with a 2-element gap in the middle of each 4-byte chunk. |
| ; i8s are not supported by masked stores on the target, so the stores will not be vectorized. |
| ; The loads, on the other hand, get gap filled. |
| define void @cantMaski8(ptr addrspace(1) %in, ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @cantMaski8( |
| ; CHECK-SAME: ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.masked.load.v32i8.p1(ptr addrspace(1) align 32 [[IN]], <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true>, <32 x i8> poison) |
| ; CHECK-NEXT: [[LOAD031:%.*]] = extractelement <32 x i8> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[GAPFILL32:%.*]] = extractelement <32 x i8> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[GAPFILL233:%.*]] = extractelement <32 x i8> [[TMP1]], i32 2 |
| ; CHECK-NEXT: [[LOAD334:%.*]] = extractelement <32 x i8> [[TMP1]], i32 3 |
| ; CHECK-NEXT: [[LOAD435:%.*]] = extractelement <32 x i8> [[TMP1]], i32 4 |
| ; CHECK-NEXT: [[GAPFILL436:%.*]] = extractelement <32 x i8> [[TMP1]], i32 5 |
| ; CHECK-NEXT: [[GAPFILL637:%.*]] = extractelement <32 x i8> [[TMP1]], i32 6 |
| ; CHECK-NEXT: [[LOAD738:%.*]] = extractelement <32 x i8> [[TMP1]], i32 7 |
| ; CHECK-NEXT: [[LOAD839:%.*]] = extractelement <32 x i8> [[TMP1]], i32 8 |
| ; CHECK-NEXT: [[GAPFILL840:%.*]] = extractelement <32 x i8> [[TMP1]], i32 9 |
| ; CHECK-NEXT: [[GAPFILL1041:%.*]] = extractelement <32 x i8> [[TMP1]], i32 10 |
| ; CHECK-NEXT: [[LOAD1142:%.*]] = extractelement <32 x i8> [[TMP1]], i32 11 |
| ; CHECK-NEXT: [[LOAD1243:%.*]] = extractelement <32 x i8> [[TMP1]], i32 12 |
| ; CHECK-NEXT: [[GAPFILL1244:%.*]] = extractelement <32 x i8> [[TMP1]], i32 13 |
| ; CHECK-NEXT: [[GAPFILL1445:%.*]] = extractelement <32 x i8> [[TMP1]], i32 14 |
| ; CHECK-NEXT: [[LOAD1546:%.*]] = extractelement <32 x i8> [[TMP1]], i32 15 |
| ; CHECK-NEXT: [[LOAD1647:%.*]] = extractelement <32 x i8> [[TMP1]], i32 16 |
| ; CHECK-NEXT: [[GAPFILL1648:%.*]] = extractelement <32 x i8> [[TMP1]], i32 17 |
| ; CHECK-NEXT: [[GAPFILL1849:%.*]] = extractelement <32 x i8> [[TMP1]], i32 18 |
| ; CHECK-NEXT: [[LOAD1950:%.*]] = extractelement <32 x i8> [[TMP1]], i32 19 |
| ; CHECK-NEXT: [[LOAD2051:%.*]] = extractelement <32 x i8> [[TMP1]], i32 20 |
| ; CHECK-NEXT: [[GAPFILL2052:%.*]] = extractelement <32 x i8> [[TMP1]], i32 21 |
| ; CHECK-NEXT: [[GAPFILL2253:%.*]] = extractelement <32 x i8> [[TMP1]], i32 22 |
| ; CHECK-NEXT: [[LOAD2354:%.*]] = extractelement <32 x i8> [[TMP1]], i32 23 |
| ; CHECK-NEXT: [[LOAD2455:%.*]] = extractelement <32 x i8> [[TMP1]], i32 24 |
| ; CHECK-NEXT: [[GAPFILL2456:%.*]] = extractelement <32 x i8> [[TMP1]], i32 25 |
| ; CHECK-NEXT: [[GAPFILL2657:%.*]] = extractelement <32 x i8> [[TMP1]], i32 26 |
| ; CHECK-NEXT: [[LOAD2758:%.*]] = extractelement <32 x i8> [[TMP1]], i32 27 |
| ; CHECK-NEXT: [[LOAD2859:%.*]] = extractelement <32 x i8> [[TMP1]], i32 28 |
| ; CHECK-NEXT: [[GAPFILL2860:%.*]] = extractelement <32 x i8> [[TMP1]], i32 29 |
| ; CHECK-NEXT: [[GAPFILL3061:%.*]] = extractelement <32 x i8> [[TMP1]], i32 30 |
| ; CHECK-NEXT: [[LOAD3162:%.*]] = extractelement <32 x i8> [[TMP1]], i32 31 |
| ; CHECK-NEXT: store i8 [[LOAD031]], ptr addrspace(1) [[OUT]], align 32 |
| ; CHECK-NEXT: [[OUTELEM3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 3 |
| ; CHECK-NEXT: store i8 [[LOAD334]], ptr addrspace(1) [[OUTELEM3]], align 1 |
| ; CHECK-NEXT: [[OUTELEM4:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 4 |
| ; CHECK-NEXT: store i8 [[LOAD435]], ptr addrspace(1) [[OUTELEM4]], align 4 |
| ; CHECK-NEXT: [[OUTELEM7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 7 |
| ; CHECK-NEXT: store i8 [[LOAD738]], ptr addrspace(1) [[OUTELEM7]], align 1 |
| ; CHECK-NEXT: [[OUTELEM8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 8 |
| ; CHECK-NEXT: store i8 [[LOAD839]], ptr addrspace(1) [[OUTELEM8]], align 8 |
| ; CHECK-NEXT: [[OUTELEM11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 11 |
| ; CHECK-NEXT: store i8 [[LOAD1142]], ptr addrspace(1) [[OUTELEM11]], align 1 |
| ; CHECK-NEXT: [[OUTELEM12:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 12 |
| ; CHECK-NEXT: store i8 [[LOAD1243]], ptr addrspace(1) [[OUTELEM12]], align 4 |
| ; CHECK-NEXT: [[OUTELEM15:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 15 |
| ; CHECK-NEXT: store i8 [[LOAD1546]], ptr addrspace(1) [[OUTELEM15]], align 1 |
| ; CHECK-NEXT: [[OUTELEM16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 16 |
| ; CHECK-NEXT: store i8 [[LOAD1647]], ptr addrspace(1) [[OUTELEM16]], align 16 |
| ; CHECK-NEXT: [[OUTELEM19:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 19 |
| ; CHECK-NEXT: store i8 [[LOAD1950]], ptr addrspace(1) [[OUTELEM19]], align 1 |
| ; CHECK-NEXT: [[OUTELEM20:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 20 |
| ; CHECK-NEXT: store i8 [[LOAD2051]], ptr addrspace(1) [[OUTELEM20]], align 4 |
| ; CHECK-NEXT: [[OUTELEM23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 23 |
| ; CHECK-NEXT: store i8 [[LOAD2354]], ptr addrspace(1) [[OUTELEM23]], align 1 |
| ; CHECK-NEXT: [[OUTELEM24:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 24 |
| ; CHECK-NEXT: store i8 [[LOAD2455]], ptr addrspace(1) [[OUTELEM24]], align 8 |
| ; CHECK-NEXT: [[OUTELEM27:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 27 |
| ; CHECK-NEXT: store i8 [[LOAD2758]], ptr addrspace(1) [[OUTELEM27]], align 1 |
| ; CHECK-NEXT: [[OUTELEM28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 28 |
| ; CHECK-NEXT: store i8 [[LOAD2859]], ptr addrspace(1) [[OUTELEM28]], align 4 |
| ; CHECK-NEXT: [[OUTELEM31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 31 |
| ; CHECK-NEXT: store i8 [[LOAD3162]], ptr addrspace(1) [[OUTELEM31]], align 1 |
| ; CHECK-NEXT: ret void |
| ; |
| %load0 = load i8, ptr addrspace(1) %in, align 32 |
| %getElem3 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 3 |
| %load3 = load i8, ptr addrspace(1) %getElem3, align 1 |
| %getElem4 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 4 |
| %load4 = load i8, ptr addrspace(1) %getElem4, align 4 |
| %getElem7 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 7 |
| %load7 = load i8, ptr addrspace(1) %getElem7, align 1 |
| %getElem8 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 8 |
| %load8 = load i8, ptr addrspace(1) %getElem8, align 8 |
| %getElem11 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 11 |
| %load11 = load i8, ptr addrspace(1) %getElem11, align 1 |
| %getElem12 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 12 |
| %load12 = load i8, ptr addrspace(1) %getElem12, align 4 |
| %getElem15 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 15 |
| %load15 = load i8, ptr addrspace(1) %getElem15, align 1 |
| %getElem16 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 16 |
| %load16 = load i8, ptr addrspace(1) %getElem16, align 16 |
| %getElem19 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 19 |
| %load19 = load i8, ptr addrspace(1) %getElem19, align 1 |
| %getElem20 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 20 |
| %load20 = load i8, ptr addrspace(1) %getElem20, align 4 |
| %getElem23 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 23 |
| %load23 = load i8, ptr addrspace(1) %getElem23, align 1 |
| %getElem24 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 24 |
| %load24 = load i8, ptr addrspace(1) %getElem24, align 8 |
| %getElem27 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 27 |
| %load27 = load i8, ptr addrspace(1) %getElem27, align 1 |
| %getElem28 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 28 |
| %load28 = load i8, ptr addrspace(1) %getElem28, align 4 |
| %getElem31 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 31 |
| %load31 = load i8, ptr addrspace(1) %getElem31, align 1 |
| |
| store i8 %load0, ptr addrspace(1) %out, align 32 |
| %outElem3 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 3 |
| store i8 %load3, ptr addrspace(1) %outElem3, align 1 |
| %outElem4 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 4 |
| store i8 %load4, ptr addrspace(1) %outElem4, align 4 |
| %outElem7 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 7 |
| store i8 %load7, ptr addrspace(1) %outElem7, align 1 |
| %outElem8 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i8 %load8, ptr addrspace(1) %outElem8, align 8 |
| %outElem11 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 11 |
| store i8 %load11, ptr addrspace(1) %outElem11, align 1 |
| %outElem12 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 12 |
| store i8 %load12, ptr addrspace(1) %outElem12, align 4 |
| %outElem15 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 15 |
| store i8 %load15, ptr addrspace(1) %outElem15, align 1 |
| %outElem16 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i8 %load16, ptr addrspace(1) %outElem16, align 16 |
| %outElem19 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 19 |
| store i8 %load19, ptr addrspace(1) %outElem19, align 1 |
| %outElem20 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 20 |
| store i8 %load20, ptr addrspace(1) %outElem20, align 4 |
| %outElem23 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 23 |
| store i8 %load23, ptr addrspace(1) %outElem23, align 1 |
| %outElem24 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i8 %load24, ptr addrspace(1) %outElem24, align 8 |
| %outElem27 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 27 |
| store i8 %load27, ptr addrspace(1) %outElem27, align 1 |
| %outElem28 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 28 |
| store i8 %load28, ptr addrspace(1) %outElem28, align 4 |
| %outElem31 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 31 |
| store i8 %load31, ptr addrspace(1) %outElem31, align 1 |
| |
| ret void |
| } |
| |
| ; This test has 32-bytes of i16s with a 2-element gap in the middle of each 4-element chunk. |
| ; i16s are not supported by masked stores on the target, so the stores will not be vectorized. |
| ; The loads, on the other hand, get gap filled. |
| define void @cantMaski16(ptr addrspace(1) %in, ptr addrspace(1) %out) { |
| ; CHECK-LABEL: define void @cantMaski16( |
| ; CHECK-SAME: ptr addrspace(1) [[IN:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.masked.load.v16i16.p1(ptr addrspace(1) align 32 [[IN]], <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true>, <16 x i16> poison) |
| ; CHECK-NEXT: [[LOAD015:%.*]] = extractelement <16 x i16> [[TMP1]], i32 0 |
| ; CHECK-NEXT: [[GAPFILL16:%.*]] = extractelement <16 x i16> [[TMP1]], i32 1 |
| ; CHECK-NEXT: [[GAPFILL217:%.*]] = extractelement <16 x i16> [[TMP1]], i32 2 |
| ; CHECK-NEXT: [[LOAD318:%.*]] = extractelement <16 x i16> [[TMP1]], i32 3 |
| ; CHECK-NEXT: [[LOAD419:%.*]] = extractelement <16 x i16> [[TMP1]], i32 4 |
| ; CHECK-NEXT: [[GAPFILL420:%.*]] = extractelement <16 x i16> [[TMP1]], i32 5 |
| ; CHECK-NEXT: [[GAPFILL621:%.*]] = extractelement <16 x i16> [[TMP1]], i32 6 |
| ; CHECK-NEXT: [[LOAD722:%.*]] = extractelement <16 x i16> [[TMP1]], i32 7 |
| ; CHECK-NEXT: [[LOAD823:%.*]] = extractelement <16 x i16> [[TMP1]], i32 8 |
| ; CHECK-NEXT: [[GAPFILL824:%.*]] = extractelement <16 x i16> [[TMP1]], i32 9 |
| ; CHECK-NEXT: [[GAPFILL1025:%.*]] = extractelement <16 x i16> [[TMP1]], i32 10 |
| ; CHECK-NEXT: [[LOAD1126:%.*]] = extractelement <16 x i16> [[TMP1]], i32 11 |
| ; CHECK-NEXT: [[LOAD1227:%.*]] = extractelement <16 x i16> [[TMP1]], i32 12 |
| ; CHECK-NEXT: [[GAPFILL1228:%.*]] = extractelement <16 x i16> [[TMP1]], i32 13 |
| ; CHECK-NEXT: [[GAPFILL1429:%.*]] = extractelement <16 x i16> [[TMP1]], i32 14 |
| ; CHECK-NEXT: [[LOAD1530:%.*]] = extractelement <16 x i16> [[TMP1]], i32 15 |
| ; CHECK-NEXT: store i16 [[LOAD015]], ptr addrspace(1) [[OUT]], align 32 |
| ; CHECK-NEXT: [[OUTELEM6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 6 |
| ; CHECK-NEXT: store i16 [[LOAD318]], ptr addrspace(1) [[OUTELEM6]], align 2 |
| ; CHECK-NEXT: [[OUTELEM8:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 8 |
| ; CHECK-NEXT: store i16 [[LOAD419]], ptr addrspace(1) [[OUTELEM8]], align 8 |
| ; CHECK-NEXT: [[OUTELEM14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 14 |
| ; CHECK-NEXT: store i16 [[LOAD722]], ptr addrspace(1) [[OUTELEM14]], align 2 |
| ; CHECK-NEXT: [[OUTELEM16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 16 |
| ; CHECK-NEXT: store i16 [[LOAD823]], ptr addrspace(1) [[OUTELEM16]], align 16 |
| ; CHECK-NEXT: [[OUTELEM22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 22 |
| ; CHECK-NEXT: store i16 [[LOAD1126]], ptr addrspace(1) [[OUTELEM22]], align 2 |
| ; CHECK-NEXT: [[OUTELEM24:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 24 |
| ; CHECK-NEXT: store i16 [[LOAD1227]], ptr addrspace(1) [[OUTELEM24]], align 8 |
| ; CHECK-NEXT: [[OUTELEM30:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[OUT]], i32 30 |
| ; CHECK-NEXT: store i16 [[LOAD1530]], ptr addrspace(1) [[OUTELEM30]], align 2 |
| ; CHECK-NEXT: ret void |
| ; |
| %load0 = load i16, ptr addrspace(1) %in, align 32 |
| %getElem6 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 6 |
| %load3 = load i16, ptr addrspace(1) %getElem6, align 2 |
| %getElem8 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 8 |
| %load4 = load i16, ptr addrspace(1) %getElem8, align 8 |
| %getElem14 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 14 |
| %load7 = load i16, ptr addrspace(1) %getElem14, align 2 |
| %getElem16 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 16 |
| %load8 = load i16, ptr addrspace(1) %getElem16, align 16 |
| %getElem22 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 22 |
| %load11 = load i16, ptr addrspace(1) %getElem22, align 2 |
| %getElem24 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 24 |
| %load12 = load i16, ptr addrspace(1) %getElem24, align 8 |
| %getElem30 = getelementptr inbounds i8, ptr addrspace(1) %in, i32 30 |
| %load15 = load i16, ptr addrspace(1) %getElem30, align 2 |
| |
| store i16 %load0, ptr addrspace(1) %out, align 32 |
| %outElem6 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 6 |
| store i16 %load3, ptr addrspace(1) %outElem6, align 2 |
| %outElem8 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 8 |
| store i16 %load4, ptr addrspace(1) %outElem8, align 8 |
| %outElem14 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 14 |
| store i16 %load7, ptr addrspace(1) %outElem14, align 2 |
| %outElem16 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 16 |
| store i16 %load8, ptr addrspace(1) %outElem16, align 16 |
| %outElem22 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 22 |
| store i16 %load11, ptr addrspace(1) %outElem22, align 2 |
| %outElem24 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 24 |
| store i16 %load12, ptr addrspace(1) %outElem24, align 8 |
| %outElem30 = getelementptr inbounds i8, ptr addrspace(1) %out, i32 30 |
| store i16 %load15, ptr addrspace(1) %outElem30, align 2 |
| |
| ret void |
| } |