| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| |
| ; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi < %s | FileCheck %s |
| |
| define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) { |
| ; CHECK-LABEL: arm_q15_to_q31: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} |
| ; CHECK-NEXT: push {r4, r5, r6, r7, lr} |
| ; CHECK-NEXT: .pad #8 |
| ; CHECK-NEXT: sub sp, #8 |
| ; CHECK-NEXT: mov r7, r2 |
| ; CHECK-NEXT: lsrs r3, r2, #2 |
| ; CHECK-NEXT: beq .LBB0_6 |
| ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader |
| ; CHECK-NEXT: movs r5, #3 |
| ; CHECK-NEXT: ands r5, r3 |
| ; CHECK-NEXT: subs r2, r3, #1 |
| ; CHECK-NEXT: cbz r5, .LBB0_4 |
| ; CHECK-NEXT: @ %bb.2: @ %while.body.prol |
| ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill |
| ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: ldrh r7, [r0, #2] |
| ; CHECK-NEXT: ldrh r4, [r0, #4] |
| ; CHECK-NEXT: ldrh r6, [r0, #6] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r7, r7, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: stm r1!, {r2, r7} |
| ; CHECK-NEXT: str r4, [r1] |
| ; CHECK-NEXT: str r6, [r1, #4] |
| ; CHECK-NEXT: subs r1, #8 |
| ; CHECK-NEXT: cmp r5, #1 |
| ; CHECK-NEXT: bne .LBB0_11 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: adds r1, #16 |
| ; CHECK-NEXT: adds r0, #8 |
| ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload |
| ; CHECK-NEXT: mov r3, r2 |
| ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload |
| ; CHECK-NEXT: .LBB0_4: @ %while.body.prol.loopexit |
| ; CHECK-NEXT: cmp r2, #3 |
| ; CHECK-NEXT: blo .LBB0_6 |
| ; CHECK-NEXT: .LBB0_5: @ %while.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: ldrh r4, [r0, #2] |
| ; CHECK-NEXT: ldrh r5, [r0, #4] |
| ; CHECK-NEXT: ldrh r6, [r0, #6] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #12] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #8] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #4] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1] |
| ; CHECK-NEXT: ldrh r2, [r0, #8] |
| ; CHECK-NEXT: ldrh r4, [r0, #10] |
| ; CHECK-NEXT: ldrh r5, [r0, #12] |
| ; CHECK-NEXT: ldrh r6, [r0, #14] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #28] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #24] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #20] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #16] |
| ; CHECK-NEXT: ldrh r2, [r0, #16] |
| ; CHECK-NEXT: ldrh r4, [r0, #18] |
| ; CHECK-NEXT: ldrh r5, [r0, #20] |
| ; CHECK-NEXT: ldrh r6, [r0, #22] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #44] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #40] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #36] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #32] |
| ; CHECK-NEXT: ldrh r2, [r0, #24] |
| ; CHECK-NEXT: ldrh r4, [r0, #26] |
| ; CHECK-NEXT: ldrh r5, [r0, #28] |
| ; CHECK-NEXT: ldrh r6, [r0, #30] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #60] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #56] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #52] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #48] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: adds r0, #32 |
| ; CHECK-NEXT: subs r3, r3, #4 |
| ; CHECK-NEXT: bne .LBB0_5 |
| ; CHECK-NEXT: .LBB0_6: @ %while.end |
| ; CHECK-NEXT: movs r2, #3 |
| ; CHECK-NEXT: ands r7, r2 |
| ; CHECK-NEXT: beq .LBB0_10 |
| ; CHECK-NEXT: @ %bb.7: @ %while.body12 |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1] |
| ; CHECK-NEXT: cmp r7, #1 |
| ; CHECK-NEXT: beq .LBB0_10 |
| ; CHECK-NEXT: @ %bb.8: @ %while.body12.1 |
| ; CHECK-NEXT: ldrh r2, [r0, #2] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #4] |
| ; CHECK-NEXT: cmp r7, #2 |
| ; CHECK-NEXT: beq .LBB0_10 |
| ; CHECK-NEXT: @ %bb.9: @ %while.body12.2 |
| ; CHECK-NEXT: ldrh r0, [r0, #4] |
| ; CHECK-NEXT: lsls r0, r0, #16 |
| ; CHECK-NEXT: str r0, [r1, #8] |
| ; CHECK-NEXT: .LBB0_10: @ %while.end17 |
| ; CHECK-NEXT: add sp, #8 |
| ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} |
| ; CHECK-NEXT: .LBB0_11: @ %while.body.prol.1 |
| ; CHECK-NEXT: ldrh r2, [r0, #8] |
| ; CHECK-NEXT: ldrh r4, [r0, #10] |
| ; CHECK-NEXT: ldrh r6, [r0, #12] |
| ; CHECK-NEXT: ldrh r7, [r0, #14] |
| ; CHECK-NEXT: lsls r7, r7, #16 |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #16] |
| ; CHECK-NEXT: str r4, [r1, #20] |
| ; CHECK-NEXT: str r6, [r1, #24] |
| ; CHECK-NEXT: str r7, [r1, #28] |
| ; CHECK-NEXT: cmp r5, #2 |
| ; CHECK-NEXT: bne .LBB0_13 |
| ; CHECK-NEXT: @ %bb.12: |
| ; CHECK-NEXT: subs r3, r3, #2 |
| ; CHECK-NEXT: adds r1, #32 |
| ; CHECK-NEXT: adds r0, #16 |
| ; CHECK-NEXT: b .LBB0_14 |
| ; CHECK-NEXT: .LBB0_13: @ %while.body.prol.2 |
| ; CHECK-NEXT: ldrh r2, [r0, #16] |
| ; CHECK-NEXT: ldrh r4, [r0, #18] |
| ; CHECK-NEXT: ldrh r5, [r0, #20] |
| ; CHECK-NEXT: ldrh r6, [r0, #22] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: mov r7, r1 |
| ; CHECK-NEXT: adds r7, #32 |
| ; CHECK-NEXT: stm r7!, {r2, r4, r5, r6} |
| ; CHECK-NEXT: subs r3, r3, #3 |
| ; CHECK-NEXT: adds r1, #48 |
| ; CHECK-NEXT: adds r0, #24 |
| ; CHECK-NEXT: .LBB0_14: @ %while.body.prol.loopexit |
| ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload |
| ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload |
| ; CHECK-NEXT: cmp r2, #3 |
| ; CHECK-NEXT: bhs .LBB0_5 |
| ; CHECK-NEXT: b .LBB0_6 |
| entry: |
| %cmp.not19 = icmp ult i32 %blockSize, 4 |
| br i1 %cmp.not19, label %while.end, label %while.body.preheader |
| |
| while.body.preheader: ; preds = %entry |
| %shr = lshr i32 %blockSize, 2 |
| %0 = add nsw i32 %shr, -1 |
| %xtraiter = and i32 %shr, 3 |
| %lcmp.mod.not = icmp eq i32 %xtraiter, 0 |
| br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol |
| |
| while.body.prol: ; preds = %while.body.preheader |
| %pIn.0.val.prol = load i16, ptr %pSrc, align 2 |
| %1 = getelementptr i8, ptr %pSrc, i32 2 |
| %pIn.0.val13.prol = load i16, ptr %1, align 2 |
| %conv.i.prol = sext i16 %pIn.0.val13.prol to i32 |
| %shl.i.prol = shl nsw i32 %conv.i.prol, 16 |
| %conv22.i.prol = zext i16 %pIn.0.val.prol to i32 |
| %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4 |
| %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2 |
| %add.ptr3.val.prol = load i16, ptr %add.ptr3.prol, align 2 |
| %2 = getelementptr i16, ptr %pSrc, i32 3 |
| %add.ptr3.val14.prol = load i16, ptr %2, align 2 |
| %conv.i15.prol = sext i16 %add.ptr3.val14.prol to i32 |
| %shl.i16.prol = shl nsw i32 %conv.i15.prol, 16 |
| %conv22.i17.prol = zext i16 %add.ptr3.val.prol to i32 |
| %shl.prol = shl nuw i32 %conv22.i.prol, 16 |
| %shl5.prol = shl nuw i32 %conv22.i17.prol, 16 |
| %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1 |
| store i32 %shl.prol, ptr %pDst, align 4 |
| %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2 |
| store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4 |
| %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3 |
| store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4 |
| %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4 |
| store i32 %shl.i16.prol, ptr %incdec.ptr8.prol, align 4 |
| %dec.prol = add nsw i32 %shr, -1 |
| %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1 |
| br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1 |
| |
| while.body.prol.1: ; preds = %while.body.prol |
| %pIn.0.val.prol.1 = load i16, ptr %add.ptr2.prol, align 2 |
| %3 = getelementptr i16, ptr %pSrc, i32 5 |
| %pIn.0.val13.prol.1 = load i16, ptr %3, align 2 |
| %conv.i.prol.1 = sext i16 %pIn.0.val13.prol.1 to i32 |
| %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16 |
| %conv22.i.prol.1 = zext i16 %pIn.0.val.prol.1 to i32 |
| %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8 |
| %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6 |
| %add.ptr3.val.prol.1 = load i16, ptr %add.ptr3.prol.1, align 2 |
| %4 = getelementptr i16, ptr %pSrc, i32 7 |
| %add.ptr3.val14.prol.1 = load i16, ptr %4, align 2 |
| %conv.i15.prol.1 = sext i16 %add.ptr3.val14.prol.1 to i32 |
| %shl.i16.prol.1 = shl nsw i32 %conv.i15.prol.1, 16 |
| %conv22.i17.prol.1 = zext i16 %add.ptr3.val.prol.1 to i32 |
| %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16 |
| %shl5.prol.1 = shl nuw i32 %conv22.i17.prol.1, 16 |
| %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5 |
| store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4 |
| %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6 |
| store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4 |
| %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7 |
| store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4 |
| %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8 |
| store i32 %shl.i16.prol.1, ptr %incdec.ptr8.prol.1, align 4 |
| %dec.prol.1 = add nsw i32 %shr, -2 |
| %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 |
| br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2 |
| |
| while.body.prol.2: ; preds = %while.body.prol.1 |
| %pIn.0.val.prol.2 = load i16, ptr %add.ptr2.prol.1, align 2 |
| %5 = getelementptr i16, ptr %pSrc, i32 9 |
| %pIn.0.val13.prol.2 = load i16, ptr %5, align 2 |
| %conv.i.prol.2 = sext i16 %pIn.0.val13.prol.2 to i32 |
| %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16 |
| %conv22.i.prol.2 = zext i16 %pIn.0.val.prol.2 to i32 |
| %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12 |
| %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10 |
| %add.ptr3.val.prol.2 = load i16, ptr %add.ptr3.prol.2, align 2 |
| %6 = getelementptr i16, ptr %pSrc, i32 11 |
| %add.ptr3.val14.prol.2 = load i16, ptr %6, align 2 |
| %conv.i15.prol.2 = sext i16 %add.ptr3.val14.prol.2 to i32 |
| %shl.i16.prol.2 = shl nsw i32 %conv.i15.prol.2, 16 |
| %conv22.i17.prol.2 = zext i16 %add.ptr3.val.prol.2 to i32 |
| %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16 |
| %shl5.prol.2 = shl nuw i32 %conv22.i17.prol.2, 16 |
| %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9 |
| store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4 |
| %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10 |
| store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4 |
| %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11 |
| store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4 |
| %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12 |
| store i32 %shl.i16.prol.2, ptr %incdec.ptr8.prol.2, align 4 |
| %dec.prol.2 = add nsw i32 %shr, -3 |
| br label %while.body.prol.loopexit |
| |
| while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader |
| %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] |
| %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] |
| %pDst.addr.022.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] |
| %blkCnt.021.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ] |
| %pIn.020.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] |
| %7 = icmp ult i32 %0, 3 |
| br i1 %7, label %while.end, label %while.body |
| |
| while.body: ; preds = %while.body.prol.loopexit, %while.body |
| %pDst.addr.022 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.022.unr, %while.body.prol.loopexit ] |
| %blkCnt.021 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.021.unr, %while.body.prol.loopexit ] |
| %pIn.020 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.020.unr, %while.body.prol.loopexit ] |
| %pIn.0.val = load i16, ptr %pIn.020, align 2 |
| %8 = getelementptr i8, ptr %pIn.020, i32 2 |
| %pIn.0.val13 = load i16, ptr %8, align 2 |
| %conv.i = sext i16 %pIn.0.val13 to i32 |
| %shl.i = shl nsw i32 %conv.i, 16 |
| %conv22.i = zext i16 %pIn.0.val to i32 |
| %add.ptr2 = getelementptr inbounds i16, ptr %pIn.020, i32 4 |
| %add.ptr3 = getelementptr inbounds i16, ptr %pIn.020, i32 2 |
| %add.ptr3.val = load i16, ptr %add.ptr3, align 2 |
| %9 = getelementptr i16, ptr %pIn.020, i32 3 |
| %add.ptr3.val14 = load i16, ptr %9, align 2 |
| %conv.i15 = sext i16 %add.ptr3.val14 to i32 |
| %shl.i16 = shl nsw i32 %conv.i15, 16 |
| %conv22.i17 = zext i16 %add.ptr3.val to i32 |
| %shl = shl nuw i32 %conv22.i, 16 |
| %shl5 = shl nuw i32 %conv22.i17, 16 |
| %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.022, i32 1 |
| store i32 %shl, ptr %pDst.addr.022, align 4 |
| %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 2 |
| store i32 %shl.i, ptr %incdec.ptr, align 4 |
| %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 3 |
| store i32 %shl5, ptr %incdec.ptr7, align 4 |
| %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 4 |
| store i32 %shl.i16, ptr %incdec.ptr8, align 4 |
| %pIn.0.val.1 = load i16, ptr %add.ptr2, align 2 |
| %10 = getelementptr i16, ptr %pIn.020, i32 5 |
| %pIn.0.val13.1 = load i16, ptr %10, align 2 |
| %conv.i.1 = sext i16 %pIn.0.val13.1 to i32 |
| %shl.i.1 = shl nsw i32 %conv.i.1, 16 |
| %conv22.i.1 = zext i16 %pIn.0.val.1 to i32 |
| %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.020, i32 8 |
| %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.020, i32 6 |
| %add.ptr3.val.1 = load i16, ptr %add.ptr3.1, align 2 |
| %11 = getelementptr i16, ptr %pIn.020, i32 7 |
| %add.ptr3.val14.1 = load i16, ptr %11, align 2 |
| %conv.i15.1 = sext i16 %add.ptr3.val14.1 to i32 |
| %shl.i16.1 = shl nsw i32 %conv.i15.1, 16 |
| %conv22.i17.1 = zext i16 %add.ptr3.val.1 to i32 |
| %shl.1 = shl nuw i32 %conv22.i.1, 16 |
| %shl5.1 = shl nuw i32 %conv22.i17.1, 16 |
| %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 5 |
| store i32 %shl.1, ptr %incdec.ptr9, align 4 |
| %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 6 |
| store i32 %shl.i.1, ptr %incdec.ptr.1, align 4 |
| %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 7 |
| store i32 %shl5.1, ptr %incdec.ptr7.1, align 4 |
| %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 8 |
| store i32 %shl.i16.1, ptr %incdec.ptr8.1, align 4 |
| %pIn.0.val.2 = load i16, ptr %add.ptr2.1, align 2 |
| %12 = getelementptr i16, ptr %pIn.020, i32 9 |
| %pIn.0.val13.2 = load i16, ptr %12, align 2 |
| %conv.i.2 = sext i16 %pIn.0.val13.2 to i32 |
| %shl.i.2 = shl nsw i32 %conv.i.2, 16 |
| %conv22.i.2 = zext i16 %pIn.0.val.2 to i32 |
| %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.020, i32 12 |
| %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.020, i32 10 |
| %add.ptr3.val.2 = load i16, ptr %add.ptr3.2, align 2 |
| %13 = getelementptr i16, ptr %pIn.020, i32 11 |
| %add.ptr3.val14.2 = load i16, ptr %13, align 2 |
| %conv.i15.2 = sext i16 %add.ptr3.val14.2 to i32 |
| %shl.i16.2 = shl nsw i32 %conv.i15.2, 16 |
| %conv22.i17.2 = zext i16 %add.ptr3.val.2 to i32 |
| %shl.2 = shl nuw i32 %conv22.i.2, 16 |
| %shl5.2 = shl nuw i32 %conv22.i17.2, 16 |
| %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 9 |
| store i32 %shl.2, ptr %incdec.ptr9.1, align 4 |
| %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 10 |
| store i32 %shl.i.2, ptr %incdec.ptr.2, align 4 |
| %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 11 |
| store i32 %shl5.2, ptr %incdec.ptr7.2, align 4 |
| %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 12 |
| store i32 %shl.i16.2, ptr %incdec.ptr8.2, align 4 |
| %pIn.0.val.3 = load i16, ptr %add.ptr2.2, align 2 |
| %14 = getelementptr i16, ptr %pIn.020, i32 13 |
| %pIn.0.val13.3 = load i16, ptr %14, align 2 |
| %conv.i.3 = sext i16 %pIn.0.val13.3 to i32 |
| %shl.i.3 = shl nsw i32 %conv.i.3, 16 |
| %conv22.i.3 = zext i16 %pIn.0.val.3 to i32 |
| %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.020, i32 16 |
| %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.020, i32 14 |
| %add.ptr3.val.3 = load i16, ptr %add.ptr3.3, align 2 |
| %15 = getelementptr i16, ptr %pIn.020, i32 15 |
| %add.ptr3.val14.3 = load i16, ptr %15, align 2 |
| %conv.i15.3 = sext i16 %add.ptr3.val14.3 to i32 |
| %shl.i16.3 = shl nsw i32 %conv.i15.3, 16 |
| %conv22.i17.3 = zext i16 %add.ptr3.val.3 to i32 |
| %shl.3 = shl nuw i32 %conv22.i.3, 16 |
| %shl5.3 = shl nuw i32 %conv22.i17.3, 16 |
| %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 13 |
| store i32 %shl.3, ptr %incdec.ptr9.2, align 4 |
| %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 14 |
| store i32 %shl.i.3, ptr %incdec.ptr.3, align 4 |
| %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 15 |
| store i32 %shl5.3, ptr %incdec.ptr7.3, align 4 |
| %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 16 |
| store i32 %shl.i16.3, ptr %incdec.ptr8.3, align 4 |
| %dec.3 = add nsw i32 %blkCnt.021, -4 |
| %cmp.not.3 = icmp eq i32 %dec.3, 0 |
| br i1 %cmp.not.3, label %while.end, label %while.body |
| |
| while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry |
| %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ] |
| %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ] |
| %rem = and i32 %blockSize, 3 |
| %cmp11.not24 = icmp eq i32 %rem, 0 |
| br i1 %cmp11.not24, label %while.end17, label %while.body12 |
| |
| while.body12: ; preds = %while.end |
| %16 = load i16, ptr %pIn.0.lcssa, align 2 |
| %conv = sext i16 %16 to i32 |
| %shl14 = shl nsw i32 %conv, 16 |
| store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4 |
| %cmp11.not = icmp eq i32 %rem, 1 |
| br i1 %cmp11.not, label %while.end17, label %while.body12.1 |
| |
| while.body12.1: ; preds = %while.body12 |
| %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1 |
| %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1 |
| %17 = load i16, ptr %incdec.ptr13, align 2 |
| %conv.1 = sext i16 %17 to i32 |
| %shl14.1 = shl nsw i32 %conv.1, 16 |
| store i32 %shl14.1, ptr %incdec.ptr15, align 4 |
| %cmp11.not.1 = icmp eq i32 %rem, 2 |
| br i1 %cmp11.not.1, label %while.end17, label %while.body12.2 |
| |
| while.body12.2: ; preds = %while.body12.1 |
| %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2 |
| %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2 |
| %18 = load i16, ptr %incdec.ptr13.1, align 2 |
| %conv.2 = sext i16 %18 to i32 |
| %shl14.2 = shl nsw i32 %conv.2, 16 |
| store i32 %shl14.2, ptr %incdec.ptr15.1, align 4 |
| br label %while.end17 |
| |
| while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end |
| ret void |
| } |
| |
| define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) { |
| ; CHECK-LABEL: arm_q15_to_q31_altorder: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} |
| ; CHECK-NEXT: push {r4, r5, r6, r7, lr} |
| ; CHECK-NEXT: .pad #8 |
| ; CHECK-NEXT: sub sp, #8 |
| ; CHECK-NEXT: mov r7, r2 |
| ; CHECK-NEXT: lsrs r3, r2, #2 |
| ; CHECK-NEXT: beq .LBB1_6 |
| ; CHECK-NEXT: @ %bb.1: @ %while.body.preheader |
| ; CHECK-NEXT: movs r5, #3 |
| ; CHECK-NEXT: ands r5, r3 |
| ; CHECK-NEXT: subs r2, r3, #1 |
| ; CHECK-NEXT: cbz r5, .LBB1_4 |
| ; CHECK-NEXT: @ %bb.2: @ %while.body.prol |
| ; CHECK-NEXT: str r2, [sp] @ 4-byte Spill |
| ; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: ldrh r7, [r0, #2] |
| ; CHECK-NEXT: ldrh r4, [r0, #4] |
| ; CHECK-NEXT: ldrh r6, [r0, #6] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r7, r7, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: stm r1!, {r2, r7} |
| ; CHECK-NEXT: str r4, [r1] |
| ; CHECK-NEXT: str r6, [r1, #4] |
| ; CHECK-NEXT: subs r1, #8 |
| ; CHECK-NEXT: cmp r5, #1 |
| ; CHECK-NEXT: bne .LBB1_11 |
| ; CHECK-NEXT: @ %bb.3: |
| ; CHECK-NEXT: adds r1, #16 |
| ; CHECK-NEXT: adds r0, #8 |
| ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload |
| ; CHECK-NEXT: mov r3, r2 |
| ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload |
| ; CHECK-NEXT: .LBB1_4: @ %while.body.prol.loopexit |
| ; CHECK-NEXT: cmp r2, #3 |
| ; CHECK-NEXT: blo .LBB1_6 |
| ; CHECK-NEXT: .LBB1_5: @ %while.body |
| ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: ldrh r4, [r0, #2] |
| ; CHECK-NEXT: ldrh r5, [r0, #4] |
| ; CHECK-NEXT: ldrh r6, [r0, #6] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #12] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #8] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #4] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1] |
| ; CHECK-NEXT: ldrh r2, [r0, #8] |
| ; CHECK-NEXT: ldrh r4, [r0, #10] |
| ; CHECK-NEXT: ldrh r5, [r0, #12] |
| ; CHECK-NEXT: ldrh r6, [r0, #14] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #28] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #24] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #20] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #16] |
| ; CHECK-NEXT: ldrh r2, [r0, #16] |
| ; CHECK-NEXT: ldrh r4, [r0, #18] |
| ; CHECK-NEXT: ldrh r5, [r0, #20] |
| ; CHECK-NEXT: ldrh r6, [r0, #22] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #44] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #40] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #36] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #32] |
| ; CHECK-NEXT: ldrh r2, [r0, #24] |
| ; CHECK-NEXT: ldrh r4, [r0, #26] |
| ; CHECK-NEXT: ldrh r5, [r0, #28] |
| ; CHECK-NEXT: ldrh r6, [r0, #30] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: str r6, [r1, #60] |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: str r5, [r1, #56] |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: str r4, [r1, #52] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #48] |
| ; CHECK-NEXT: adds r1, #64 |
| ; CHECK-NEXT: adds r0, #32 |
| ; CHECK-NEXT: subs r3, r3, #4 |
| ; CHECK-NEXT: bne .LBB1_5 |
| ; CHECK-NEXT: .LBB1_6: @ %while.end |
| ; CHECK-NEXT: movs r2, #3 |
| ; CHECK-NEXT: ands r7, r2 |
| ; CHECK-NEXT: beq .LBB1_10 |
| ; CHECK-NEXT: @ %bb.7: @ %while.body12 |
| ; CHECK-NEXT: ldrh r2, [r0] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1] |
| ; CHECK-NEXT: cmp r7, #1 |
| ; CHECK-NEXT: beq .LBB1_10 |
| ; CHECK-NEXT: @ %bb.8: @ %while.body12.1 |
| ; CHECK-NEXT: ldrh r2, [r0, #2] |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #4] |
| ; CHECK-NEXT: cmp r7, #2 |
| ; CHECK-NEXT: beq .LBB1_10 |
| ; CHECK-NEXT: @ %bb.9: @ %while.body12.2 |
| ; CHECK-NEXT: ldrh r0, [r0, #4] |
| ; CHECK-NEXT: lsls r0, r0, #16 |
| ; CHECK-NEXT: str r0, [r1, #8] |
| ; CHECK-NEXT: .LBB1_10: @ %while.end17 |
| ; CHECK-NEXT: add sp, #8 |
| ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} |
| ; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1 |
| ; CHECK-NEXT: ldrh r2, [r0, #8] |
| ; CHECK-NEXT: ldrh r4, [r0, #10] |
| ; CHECK-NEXT: ldrh r6, [r0, #12] |
| ; CHECK-NEXT: ldrh r7, [r0, #14] |
| ; CHECK-NEXT: lsls r7, r7, #16 |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: str r2, [r1, #16] |
| ; CHECK-NEXT: str r4, [r1, #20] |
| ; CHECK-NEXT: str r6, [r1, #24] |
| ; CHECK-NEXT: str r7, [r1, #28] |
| ; CHECK-NEXT: cmp r5, #2 |
| ; CHECK-NEXT: bne .LBB1_13 |
| ; CHECK-NEXT: @ %bb.12: |
| ; CHECK-NEXT: subs r3, r3, #2 |
| ; CHECK-NEXT: adds r1, #32 |
| ; CHECK-NEXT: adds r0, #16 |
| ; CHECK-NEXT: b .LBB1_14 |
| ; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2 |
| ; CHECK-NEXT: ldrh r2, [r0, #16] |
| ; CHECK-NEXT: ldrh r4, [r0, #18] |
| ; CHECK-NEXT: ldrh r5, [r0, #20] |
| ; CHECK-NEXT: ldrh r6, [r0, #22] |
| ; CHECK-NEXT: lsls r6, r6, #16 |
| ; CHECK-NEXT: lsls r5, r5, #16 |
| ; CHECK-NEXT: lsls r4, r4, #16 |
| ; CHECK-NEXT: lsls r2, r2, #16 |
| ; CHECK-NEXT: mov r7, r1 |
| ; CHECK-NEXT: adds r7, #32 |
| ; CHECK-NEXT: stm r7!, {r2, r4, r5, r6} |
| ; CHECK-NEXT: subs r3, r3, #3 |
| ; CHECK-NEXT: adds r1, #48 |
| ; CHECK-NEXT: adds r0, #24 |
| ; CHECK-NEXT: .LBB1_14: @ %while.body.prol.loopexit |
| ; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload |
| ; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload |
| ; CHECK-NEXT: cmp r2, #3 |
| ; CHECK-NEXT: bhs .LBB1_5 |
| ; CHECK-NEXT: b .LBB1_6 |
| entry: |
| %cmp.not18 = icmp ult i32 %blockSize, 4 |
| br i1 %cmp.not18, label %while.end, label %while.body.preheader |
| |
| while.body.preheader: ; preds = %entry |
| %shr = lshr i32 %blockSize, 2 |
| %0 = add nsw i32 %shr, -1 |
| %xtraiter = and i32 %shr, 3 |
| %lcmp.mod.not = icmp eq i32 %xtraiter, 0 |
| br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol |
| |
| while.body.prol: ; preds = %while.body.preheader |
| %arrayidx.i.prol = getelementptr inbounds i16, ptr %pSrc, i32 1 |
| %1 = load i16, ptr %arrayidx.i.prol, align 2 |
| %conv.i.prol = sext i16 %1 to i32 |
| %shl.i.prol = shl nsw i32 %conv.i.prol, 16 |
| %2 = load i16, ptr %pSrc, align 2 |
| %conv22.i.prol = zext i16 %2 to i32 |
| %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4 |
| %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2 |
| %arrayidx.i13.prol = getelementptr inbounds i16, ptr %pSrc, i32 3 |
| %3 = load i16, ptr %arrayidx.i13.prol, align 2 |
| %conv.i14.prol = sext i16 %3 to i32 |
| %shl.i15.prol = shl nsw i32 %conv.i14.prol, 16 |
| %4 = load i16, ptr %add.ptr3.prol, align 2 |
| %conv22.i16.prol = zext i16 %4 to i32 |
| %shl.prol = shl nuw i32 %conv22.i.prol, 16 |
| %shl5.prol = shl nuw i32 %conv22.i16.prol, 16 |
| %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1 |
| store i32 %shl.prol, ptr %pDst, align 4 |
| %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2 |
| store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4 |
| %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3 |
| store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4 |
| %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4 |
| store i32 %shl.i15.prol, ptr %incdec.ptr8.prol, align 4 |
| %dec.prol = add nsw i32 %shr, -1 |
| %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1 |
| br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1 |
| |
| while.body.prol.1: ; preds = %while.body.prol |
| %arrayidx.i.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 5 |
| %5 = load i16, ptr %arrayidx.i.prol.1, align 2 |
| %conv.i.prol.1 = sext i16 %5 to i32 |
| %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16 |
| %6 = load i16, ptr %add.ptr2.prol, align 2 |
| %conv22.i.prol.1 = zext i16 %6 to i32 |
| %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8 |
| %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6 |
| %arrayidx.i13.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 7 |
| %7 = load i16, ptr %arrayidx.i13.prol.1, align 2 |
| %conv.i14.prol.1 = sext i16 %7 to i32 |
| %shl.i15.prol.1 = shl nsw i32 %conv.i14.prol.1, 16 |
| %8 = load i16, ptr %add.ptr3.prol.1, align 2 |
| %conv22.i16.prol.1 = zext i16 %8 to i32 |
| %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16 |
| %shl5.prol.1 = shl nuw i32 %conv22.i16.prol.1, 16 |
| %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5 |
| store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4 |
| %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6 |
| store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4 |
| %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7 |
| store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4 |
| %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8 |
| store i32 %shl.i15.prol.1, ptr %incdec.ptr8.prol.1, align 4 |
| %dec.prol.1 = add nsw i32 %shr, -2 |
| %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 |
| br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2 |
| |
| while.body.prol.2: ; preds = %while.body.prol.1 |
| %arrayidx.i.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 9 |
| %9 = load i16, ptr %arrayidx.i.prol.2, align 2 |
| %conv.i.prol.2 = sext i16 %9 to i32 |
| %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16 |
| %10 = load i16, ptr %add.ptr2.prol.1, align 2 |
| %conv22.i.prol.2 = zext i16 %10 to i32 |
| %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12 |
| %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10 |
| %arrayidx.i13.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 11 |
| %11 = load i16, ptr %arrayidx.i13.prol.2, align 2 |
| %conv.i14.prol.2 = sext i16 %11 to i32 |
| %shl.i15.prol.2 = shl nsw i32 %conv.i14.prol.2, 16 |
| %12 = load i16, ptr %add.ptr3.prol.2, align 2 |
| %conv22.i16.prol.2 = zext i16 %12 to i32 |
| %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16 |
| %shl5.prol.2 = shl nuw i32 %conv22.i16.prol.2, 16 |
| %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9 |
| store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4 |
| %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10 |
| store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4 |
| %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11 |
| store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4 |
| %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12 |
| store i32 %shl.i15.prol.2, ptr %incdec.ptr8.prol.2, align 4 |
| %dec.prol.2 = add nsw i32 %shr, -3 |
| br label %while.body.prol.loopexit |
| |
| while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader |
| %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] |
| %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] |
| %pDst.addr.021.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] |
| %blkCnt.020.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ] |
| %pIn.019.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] |
| %13 = icmp ult i32 %0, 3 |
| br i1 %13, label %while.end, label %while.body |
| |
| while.body: ; preds = %while.body.prol.loopexit, %while.body |
| %pDst.addr.021 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.021.unr, %while.body.prol.loopexit ] |
| %blkCnt.020 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.020.unr, %while.body.prol.loopexit ] |
| %pIn.019 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.019.unr, %while.body.prol.loopexit ] |
| %arrayidx.i = getelementptr inbounds i16, ptr %pIn.019, i32 1 |
| %14 = load i16, ptr %arrayidx.i, align 2 |
| %conv.i = sext i16 %14 to i32 |
| %shl.i = shl nsw i32 %conv.i, 16 |
| %15 = load i16, ptr %pIn.019, align 2 |
| %conv22.i = zext i16 %15 to i32 |
| %add.ptr2 = getelementptr inbounds i16, ptr %pIn.019, i32 4 |
| %add.ptr3 = getelementptr inbounds i16, ptr %pIn.019, i32 2 |
| %arrayidx.i13 = getelementptr inbounds i16, ptr %pIn.019, i32 3 |
| %16 = load i16, ptr %arrayidx.i13, align 2 |
| %conv.i14 = sext i16 %16 to i32 |
| %shl.i15 = shl nsw i32 %conv.i14, 16 |
| %17 = load i16, ptr %add.ptr3, align 2 |
| %conv22.i16 = zext i16 %17 to i32 |
| %shl = shl nuw i32 %conv22.i, 16 |
| %shl5 = shl nuw i32 %conv22.i16, 16 |
| %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.021, i32 1 |
| store i32 %shl, ptr %pDst.addr.021, align 4 |
| %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 2 |
| store i32 %shl.i, ptr %incdec.ptr, align 4 |
| %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 3 |
| store i32 %shl5, ptr %incdec.ptr7, align 4 |
| %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 4 |
| store i32 %shl.i15, ptr %incdec.ptr8, align 4 |
| %arrayidx.i.1 = getelementptr inbounds i16, ptr %pIn.019, i32 5 |
| %18 = load i16, ptr %arrayidx.i.1, align 2 |
| %conv.i.1 = sext i16 %18 to i32 |
| %shl.i.1 = shl nsw i32 %conv.i.1, 16 |
| %19 = load i16, ptr %add.ptr2, align 2 |
| %conv22.i.1 = zext i16 %19 to i32 |
| %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.019, i32 8 |
| %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.019, i32 6 |
| %arrayidx.i13.1 = getelementptr inbounds i16, ptr %pIn.019, i32 7 |
| %20 = load i16, ptr %arrayidx.i13.1, align 2 |
| %conv.i14.1 = sext i16 %20 to i32 |
| %shl.i15.1 = shl nsw i32 %conv.i14.1, 16 |
| %21 = load i16, ptr %add.ptr3.1, align 2 |
| %conv22.i16.1 = zext i16 %21 to i32 |
| %shl.1 = shl nuw i32 %conv22.i.1, 16 |
| %shl5.1 = shl nuw i32 %conv22.i16.1, 16 |
| %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 5 |
| store i32 %shl.1, ptr %incdec.ptr9, align 4 |
| %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 6 |
| store i32 %shl.i.1, ptr %incdec.ptr.1, align 4 |
| %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 7 |
| store i32 %shl5.1, ptr %incdec.ptr7.1, align 4 |
| %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 8 |
| store i32 %shl.i15.1, ptr %incdec.ptr8.1, align 4 |
| %arrayidx.i.2 = getelementptr inbounds i16, ptr %pIn.019, i32 9 |
| %22 = load i16, ptr %arrayidx.i.2, align 2 |
| %conv.i.2 = sext i16 %22 to i32 |
| %shl.i.2 = shl nsw i32 %conv.i.2, 16 |
| %23 = load i16, ptr %add.ptr2.1, align 2 |
| %conv22.i.2 = zext i16 %23 to i32 |
| %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.019, i32 12 |
| %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.019, i32 10 |
| %arrayidx.i13.2 = getelementptr inbounds i16, ptr %pIn.019, i32 11 |
| %24 = load i16, ptr %arrayidx.i13.2, align 2 |
| %conv.i14.2 = sext i16 %24 to i32 |
| %shl.i15.2 = shl nsw i32 %conv.i14.2, 16 |
| %25 = load i16, ptr %add.ptr3.2, align 2 |
| %conv22.i16.2 = zext i16 %25 to i32 |
| %shl.2 = shl nuw i32 %conv22.i.2, 16 |
| %shl5.2 = shl nuw i32 %conv22.i16.2, 16 |
| %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 9 |
| store i32 %shl.2, ptr %incdec.ptr9.1, align 4 |
| %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 10 |
| store i32 %shl.i.2, ptr %incdec.ptr.2, align 4 |
| %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 11 |
| store i32 %shl5.2, ptr %incdec.ptr7.2, align 4 |
| %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 12 |
| store i32 %shl.i15.2, ptr %incdec.ptr8.2, align 4 |
| %arrayidx.i.3 = getelementptr inbounds i16, ptr %pIn.019, i32 13 |
| %26 = load i16, ptr %arrayidx.i.3, align 2 |
| %conv.i.3 = sext i16 %26 to i32 |
| %shl.i.3 = shl nsw i32 %conv.i.3, 16 |
| %27 = load i16, ptr %add.ptr2.2, align 2 |
| %conv22.i.3 = zext i16 %27 to i32 |
| %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.019, i32 16 |
| %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.019, i32 14 |
| %arrayidx.i13.3 = getelementptr inbounds i16, ptr %pIn.019, i32 15 |
| %28 = load i16, ptr %arrayidx.i13.3, align 2 |
| %conv.i14.3 = sext i16 %28 to i32 |
| %shl.i15.3 = shl nsw i32 %conv.i14.3, 16 |
| %29 = load i16, ptr %add.ptr3.3, align 2 |
| %conv22.i16.3 = zext i16 %29 to i32 |
| %shl.3 = shl nuw i32 %conv22.i.3, 16 |
| %shl5.3 = shl nuw i32 %conv22.i16.3, 16 |
| %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 13 |
| store i32 %shl.3, ptr %incdec.ptr9.2, align 4 |
| %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 14 |
| store i32 %shl.i.3, ptr %incdec.ptr.3, align 4 |
| %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 15 |
| store i32 %shl5.3, ptr %incdec.ptr7.3, align 4 |
| %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 16 |
| store i32 %shl.i15.3, ptr %incdec.ptr8.3, align 4 |
| %dec.3 = add nsw i32 %blkCnt.020, -4 |
| %cmp.not.3 = icmp eq i32 %dec.3, 0 |
| br i1 %cmp.not.3, label %while.end, label %while.body |
| |
| while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry |
| %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ] |
| %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ] |
| %rem = and i32 %blockSize, 3 |
| %cmp11.not23 = icmp eq i32 %rem, 0 |
| br i1 %cmp11.not23, label %while.end17, label %while.body12 |
| |
| while.body12: ; preds = %while.end |
| %30 = load i16, ptr %pIn.0.lcssa, align 2 |
| %conv = sext i16 %30 to i32 |
| %shl14 = shl nsw i32 %conv, 16 |
| store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4 |
| %cmp11.not = icmp eq i32 %rem, 1 |
| br i1 %cmp11.not, label %while.end17, label %while.body12.1 |
| |
| while.body12.1: ; preds = %while.body12 |
| %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1 |
| %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1 |
| %31 = load i16, ptr %incdec.ptr13, align 2 |
| %conv.1 = sext i16 %31 to i32 |
| %shl14.1 = shl nsw i32 %conv.1, 16 |
| store i32 %shl14.1, ptr %incdec.ptr15, align 4 |
| %cmp11.not.1 = icmp eq i32 %rem, 2 |
| br i1 %cmp11.not.1, label %while.end17, label %while.body12.2 |
| |
| while.body12.2: ; preds = %while.body12.1 |
| %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2 |
| %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2 |
| %32 = load i16, ptr %incdec.ptr13.1, align 2 |
| %conv.2 = sext i16 %32 to i32 |
| %shl14.2 = shl nsw i32 %conv.2, 16 |
| store i32 %shl14.2, ptr %incdec.ptr15.1, align 4 |
| br label %while.end17 |
| |
| while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end |
| ret void |
| } |