blob: fdfbf3393098e4a1e0423388504ac16e1a866556 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi < %s | FileCheck %s
define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
; CHECK-LABEL: arm_q15_to_q31:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: lsrs r3, r2, #2
; CHECK-NEXT: beq .LBB0_6
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: movs r5, #3
; CHECK-NEXT: ands r5, r3
; CHECK-NEXT: subs r2, r3, #1
; CHECK-NEXT: cbz r5, .LBB0_4
; CHECK-NEXT: @ %bb.2: @ %while.body.prol
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: ldrh r7, [r0, #2]
; CHECK-NEXT: ldrh r4, [r0, #4]
; CHECK-NEXT: ldrh r6, [r0, #6]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: stm r1!, {r2, r7}
; CHECK-NEXT: str r4, [r1]
; CHECK-NEXT: str r6, [r1, #4]
; CHECK-NEXT: subs r1, #8
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: bne .LBB0_11
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: adds r1, #16
; CHECK-NEXT: adds r0, #8
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: .LBB0_4: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r2, #3
; CHECK-NEXT: blo .LBB0_6
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: ldrh r4, [r0, #2]
; CHECK-NEXT: ldrh r5, [r0, #4]
; CHECK-NEXT: ldrh r6, [r0, #6]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #12]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #8]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #4]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: ldrh r4, [r0, #10]
; CHECK-NEXT: ldrh r5, [r0, #12]
; CHECK-NEXT: ldrh r6, [r0, #14]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #28]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #24]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #20]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #16]
; CHECK-NEXT: ldrh r2, [r0, #16]
; CHECK-NEXT: ldrh r4, [r0, #18]
; CHECK-NEXT: ldrh r5, [r0, #20]
; CHECK-NEXT: ldrh r6, [r0, #22]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #44]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #40]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #36]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #32]
; CHECK-NEXT: ldrh r2, [r0, #24]
; CHECK-NEXT: ldrh r4, [r0, #26]
; CHECK-NEXT: ldrh r5, [r0, #28]
; CHECK-NEXT: ldrh r6, [r0, #30]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #60]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #56]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #52]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
; CHECK-NEXT: adds r0, #32
; CHECK-NEXT: subs r3, r3, #4
; CHECK-NEXT: bne .LBB0_5
; CHECK-NEXT: .LBB0_6: @ %while.end
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: ands r7, r2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.7: @ %while.body12
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: cmp r7, #1
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %while.body12.1
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #4]
; CHECK-NEXT: cmp r7, #2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.9: @ %while.body12.2
; CHECK-NEXT: ldrh r0, [r0, #4]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: str r0, [r1, #8]
; CHECK-NEXT: .LBB0_10: @ %while.end17
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .LBB0_11: @ %while.body.prol.1
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: ldrh r4, [r0, #10]
; CHECK-NEXT: ldrh r6, [r0, #12]
; CHECK-NEXT: ldrh r7, [r0, #14]
; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #16]
; CHECK-NEXT: str r4, [r1, #20]
; CHECK-NEXT: str r6, [r1, #24]
; CHECK-NEXT: str r7, [r1, #28]
; CHECK-NEXT: cmp r5, #2
; CHECK-NEXT: bne .LBB0_13
; CHECK-NEXT: @ %bb.12:
; CHECK-NEXT: subs r3, r3, #2
; CHECK-NEXT: adds r1, #32
; CHECK-NEXT: adds r0, #16
; CHECK-NEXT: b .LBB0_14
; CHECK-NEXT: .LBB0_13: @ %while.body.prol.2
; CHECK-NEXT: ldrh r2, [r0, #16]
; CHECK-NEXT: ldrh r4, [r0, #18]
; CHECK-NEXT: ldrh r5, [r0, #20]
; CHECK-NEXT: ldrh r6, [r0, #22]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: adds r7, #32
; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
; CHECK-NEXT: subs r3, r3, #3
; CHECK-NEXT: adds r1, #48
; CHECK-NEXT: adds r0, #24
; CHECK-NEXT: .LBB0_14: @ %while.body.prol.loopexit
; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: cmp r2, #3
; CHECK-NEXT: bhs .LBB0_5
; CHECK-NEXT: b .LBB0_6
entry:
%cmp.not19 = icmp ult i32 %blockSize, 4
br i1 %cmp.not19, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
%shr = lshr i32 %blockSize, 2
%0 = add nsw i32 %shr, -1
%xtraiter = and i32 %shr, 3
%lcmp.mod.not = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
while.body.prol: ; preds = %while.body.preheader
%pIn.0.val.prol = load i16, ptr %pSrc, align 2
%1 = getelementptr i8, ptr %pSrc, i32 2
%pIn.0.val13.prol = load i16, ptr %1, align 2
%conv.i.prol = sext i16 %pIn.0.val13.prol to i32
%shl.i.prol = shl nsw i32 %conv.i.prol, 16
%conv22.i.prol = zext i16 %pIn.0.val.prol to i32
%add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
%add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
%add.ptr3.val.prol = load i16, ptr %add.ptr3.prol, align 2
%2 = getelementptr i16, ptr %pSrc, i32 3
%add.ptr3.val14.prol = load i16, ptr %2, align 2
%conv.i15.prol = sext i16 %add.ptr3.val14.prol to i32
%shl.i16.prol = shl nsw i32 %conv.i15.prol, 16
%conv22.i17.prol = zext i16 %add.ptr3.val.prol to i32
%shl.prol = shl nuw i32 %conv22.i.prol, 16
%shl5.prol = shl nuw i32 %conv22.i17.prol, 16
%incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
store i32 %shl.prol, ptr %pDst, align 4
%incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
%incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
%incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
store i32 %shl.i16.prol, ptr %incdec.ptr8.prol, align 4
%dec.prol = add nsw i32 %shr, -1
%prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
while.body.prol.1: ; preds = %while.body.prol
%pIn.0.val.prol.1 = load i16, ptr %add.ptr2.prol, align 2
%3 = getelementptr i16, ptr %pSrc, i32 5
%pIn.0.val13.prol.1 = load i16, ptr %3, align 2
%conv.i.prol.1 = sext i16 %pIn.0.val13.prol.1 to i32
%shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
%conv22.i.prol.1 = zext i16 %pIn.0.val.prol.1 to i32
%add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
%add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
%add.ptr3.val.prol.1 = load i16, ptr %add.ptr3.prol.1, align 2
%4 = getelementptr i16, ptr %pSrc, i32 7
%add.ptr3.val14.prol.1 = load i16, ptr %4, align 2
%conv.i15.prol.1 = sext i16 %add.ptr3.val14.prol.1 to i32
%shl.i16.prol.1 = shl nsw i32 %conv.i15.prol.1, 16
%conv22.i17.prol.1 = zext i16 %add.ptr3.val.prol.1 to i32
%shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
%shl5.prol.1 = shl nuw i32 %conv22.i17.prol.1, 16
%incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
%incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
%incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
%incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
store i32 %shl.i16.prol.1, ptr %incdec.ptr8.prol.1, align 4
%dec.prol.1 = add nsw i32 %shr, -2
%prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
while.body.prol.2: ; preds = %while.body.prol.1
%pIn.0.val.prol.2 = load i16, ptr %add.ptr2.prol.1, align 2
%5 = getelementptr i16, ptr %pSrc, i32 9
%pIn.0.val13.prol.2 = load i16, ptr %5, align 2
%conv.i.prol.2 = sext i16 %pIn.0.val13.prol.2 to i32
%shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
%conv22.i.prol.2 = zext i16 %pIn.0.val.prol.2 to i32
%add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
%add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
%add.ptr3.val.prol.2 = load i16, ptr %add.ptr3.prol.2, align 2
%6 = getelementptr i16, ptr %pSrc, i32 11
%add.ptr3.val14.prol.2 = load i16, ptr %6, align 2
%conv.i15.prol.2 = sext i16 %add.ptr3.val14.prol.2 to i32
%shl.i16.prol.2 = shl nsw i32 %conv.i15.prol.2, 16
%conv22.i17.prol.2 = zext i16 %add.ptr3.val.prol.2 to i32
%shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
%shl5.prol.2 = shl nuw i32 %conv22.i17.prol.2, 16
%incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
%incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
%incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
%incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
store i32 %shl.i16.prol.2, ptr %incdec.ptr8.prol.2, align 4
%dec.prol.2 = add nsw i32 %shr, -3
br label %while.body.prol.loopexit
while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
%add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
%incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
%pDst.addr.022.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
%blkCnt.021.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
%pIn.020.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
%7 = icmp ult i32 %0, 3
br i1 %7, label %while.end, label %while.body
while.body: ; preds = %while.body.prol.loopexit, %while.body
%pDst.addr.022 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.022.unr, %while.body.prol.loopexit ]
%blkCnt.021 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.021.unr, %while.body.prol.loopexit ]
%pIn.020 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.020.unr, %while.body.prol.loopexit ]
%pIn.0.val = load i16, ptr %pIn.020, align 2
%8 = getelementptr i8, ptr %pIn.020, i32 2
%pIn.0.val13 = load i16, ptr %8, align 2
%conv.i = sext i16 %pIn.0.val13 to i32
%shl.i = shl nsw i32 %conv.i, 16
%conv22.i = zext i16 %pIn.0.val to i32
%add.ptr2 = getelementptr inbounds i16, ptr %pIn.020, i32 4
%add.ptr3 = getelementptr inbounds i16, ptr %pIn.020, i32 2
%add.ptr3.val = load i16, ptr %add.ptr3, align 2
%9 = getelementptr i16, ptr %pIn.020, i32 3
%add.ptr3.val14 = load i16, ptr %9, align 2
%conv.i15 = sext i16 %add.ptr3.val14 to i32
%shl.i16 = shl nsw i32 %conv.i15, 16
%conv22.i17 = zext i16 %add.ptr3.val to i32
%shl = shl nuw i32 %conv22.i, 16
%shl5 = shl nuw i32 %conv22.i17, 16
%incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.022, i32 1
store i32 %shl, ptr %pDst.addr.022, align 4
%incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 2
store i32 %shl.i, ptr %incdec.ptr, align 4
%incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 3
store i32 %shl5, ptr %incdec.ptr7, align 4
%incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 4
store i32 %shl.i16, ptr %incdec.ptr8, align 4
%pIn.0.val.1 = load i16, ptr %add.ptr2, align 2
%10 = getelementptr i16, ptr %pIn.020, i32 5
%pIn.0.val13.1 = load i16, ptr %10, align 2
%conv.i.1 = sext i16 %pIn.0.val13.1 to i32
%shl.i.1 = shl nsw i32 %conv.i.1, 16
%conv22.i.1 = zext i16 %pIn.0.val.1 to i32
%add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.020, i32 8
%add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.020, i32 6
%add.ptr3.val.1 = load i16, ptr %add.ptr3.1, align 2
%11 = getelementptr i16, ptr %pIn.020, i32 7
%add.ptr3.val14.1 = load i16, ptr %11, align 2
%conv.i15.1 = sext i16 %add.ptr3.val14.1 to i32
%shl.i16.1 = shl nsw i32 %conv.i15.1, 16
%conv22.i17.1 = zext i16 %add.ptr3.val.1 to i32
%shl.1 = shl nuw i32 %conv22.i.1, 16
%shl5.1 = shl nuw i32 %conv22.i17.1, 16
%incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 5
store i32 %shl.1, ptr %incdec.ptr9, align 4
%incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 6
store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
%incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 7
store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
%incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 8
store i32 %shl.i16.1, ptr %incdec.ptr8.1, align 4
%pIn.0.val.2 = load i16, ptr %add.ptr2.1, align 2
%12 = getelementptr i16, ptr %pIn.020, i32 9
%pIn.0.val13.2 = load i16, ptr %12, align 2
%conv.i.2 = sext i16 %pIn.0.val13.2 to i32
%shl.i.2 = shl nsw i32 %conv.i.2, 16
%conv22.i.2 = zext i16 %pIn.0.val.2 to i32
%add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.020, i32 12
%add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.020, i32 10
%add.ptr3.val.2 = load i16, ptr %add.ptr3.2, align 2
%13 = getelementptr i16, ptr %pIn.020, i32 11
%add.ptr3.val14.2 = load i16, ptr %13, align 2
%conv.i15.2 = sext i16 %add.ptr3.val14.2 to i32
%shl.i16.2 = shl nsw i32 %conv.i15.2, 16
%conv22.i17.2 = zext i16 %add.ptr3.val.2 to i32
%shl.2 = shl nuw i32 %conv22.i.2, 16
%shl5.2 = shl nuw i32 %conv22.i17.2, 16
%incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 9
store i32 %shl.2, ptr %incdec.ptr9.1, align 4
%incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 10
store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
%incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 11
store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
%incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 12
store i32 %shl.i16.2, ptr %incdec.ptr8.2, align 4
%pIn.0.val.3 = load i16, ptr %add.ptr2.2, align 2
%14 = getelementptr i16, ptr %pIn.020, i32 13
%pIn.0.val13.3 = load i16, ptr %14, align 2
%conv.i.3 = sext i16 %pIn.0.val13.3 to i32
%shl.i.3 = shl nsw i32 %conv.i.3, 16
%conv22.i.3 = zext i16 %pIn.0.val.3 to i32
%add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.020, i32 16
%add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.020, i32 14
%add.ptr3.val.3 = load i16, ptr %add.ptr3.3, align 2
%15 = getelementptr i16, ptr %pIn.020, i32 15
%add.ptr3.val14.3 = load i16, ptr %15, align 2
%conv.i15.3 = sext i16 %add.ptr3.val14.3 to i32
%shl.i16.3 = shl nsw i32 %conv.i15.3, 16
%conv22.i17.3 = zext i16 %add.ptr3.val.3 to i32
%shl.3 = shl nuw i32 %conv22.i.3, 16
%shl5.3 = shl nuw i32 %conv22.i17.3, 16
%incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 13
store i32 %shl.3, ptr %incdec.ptr9.2, align 4
%incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 14
store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
%incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 15
store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
%incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 16
store i32 %shl.i16.3, ptr %incdec.ptr8.3, align 4
%dec.3 = add nsw i32 %blkCnt.021, -4
%cmp.not.3 = icmp eq i32 %dec.3, 0
br i1 %cmp.not.3, label %while.end, label %while.body
while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry
%pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
%pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
%rem = and i32 %blockSize, 3
%cmp11.not24 = icmp eq i32 %rem, 0
br i1 %cmp11.not24, label %while.end17, label %while.body12
while.body12: ; preds = %while.end
%16 = load i16, ptr %pIn.0.lcssa, align 2
%conv = sext i16 %16 to i32
%shl14 = shl nsw i32 %conv, 16
store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
%cmp11.not = icmp eq i32 %rem, 1
br i1 %cmp11.not, label %while.end17, label %while.body12.1
while.body12.1: ; preds = %while.body12
%incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
%incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
%17 = load i16, ptr %incdec.ptr13, align 2
%conv.1 = sext i16 %17 to i32
%shl14.1 = shl nsw i32 %conv.1, 16
store i32 %shl14.1, ptr %incdec.ptr15, align 4
%cmp11.not.1 = icmp eq i32 %rem, 2
br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
while.body12.2: ; preds = %while.body12.1
%incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
%incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
%18 = load i16, ptr %incdec.ptr13.1, align 2
%conv.2 = sext i16 %18 to i32
%shl14.2 = shl nsw i32 %conv.2, 16
store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
br label %while.end17
while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end
ret void
}
define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
; CHECK-LABEL: arm_q15_to_q31_altorder:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, lr}
; CHECK-NEXT: push {r4, r5, r6, r7, lr}
; CHECK-NEXT: .pad #8
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: lsrs r3, r2, #2
; CHECK-NEXT: beq .LBB1_6
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: movs r5, #3
; CHECK-NEXT: ands r5, r3
; CHECK-NEXT: subs r2, r3, #1
; CHECK-NEXT: cbz r5, .LBB1_4
; CHECK-NEXT: @ %bb.2: @ %while.body.prol
; CHECK-NEXT: str r2, [sp] @ 4-byte Spill
; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: ldrh r7, [r0, #2]
; CHECK-NEXT: ldrh r4, [r0, #4]
; CHECK-NEXT: ldrh r6, [r0, #6]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: stm r1!, {r2, r7}
; CHECK-NEXT: str r4, [r1]
; CHECK-NEXT: str r6, [r1, #4]
; CHECK-NEXT: subs r1, #8
; CHECK-NEXT: cmp r5, #1
; CHECK-NEXT: bne .LBB1_11
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: adds r1, #16
; CHECK-NEXT: adds r0, #8
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: .LBB1_4: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r2, #3
; CHECK-NEXT: blo .LBB1_6
; CHECK-NEXT: .LBB1_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: ldrh r4, [r0, #2]
; CHECK-NEXT: ldrh r5, [r0, #4]
; CHECK-NEXT: ldrh r6, [r0, #6]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #12]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #8]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #4]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: ldrh r4, [r0, #10]
; CHECK-NEXT: ldrh r5, [r0, #12]
; CHECK-NEXT: ldrh r6, [r0, #14]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #28]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #24]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #20]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #16]
; CHECK-NEXT: ldrh r2, [r0, #16]
; CHECK-NEXT: ldrh r4, [r0, #18]
; CHECK-NEXT: ldrh r5, [r0, #20]
; CHECK-NEXT: ldrh r6, [r0, #22]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #44]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #40]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #36]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #32]
; CHECK-NEXT: ldrh r2, [r0, #24]
; CHECK-NEXT: ldrh r4, [r0, #26]
; CHECK-NEXT: ldrh r5, [r0, #28]
; CHECK-NEXT: ldrh r6, [r0, #30]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #60]
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: str r5, [r1, #56]
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: str r4, [r1, #52]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
; CHECK-NEXT: subs r3, r3, #4
; CHECK-NEXT: adds r0, #32
; CHECK-NEXT: cmp r3, #0
; CHECK-NEXT: bne .LBB1_5
; CHECK-NEXT: .LBB1_6: @ %while.end
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: ands r7, r2
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.7: @ %while.body12
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: cmp r7, #1
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.8: @ %while.body12.1
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #4]
; CHECK-NEXT: cmp r7, #2
; CHECK-NEXT: beq .LBB1_10
; CHECK-NEXT: @ %bb.9: @ %while.body12.2
; CHECK-NEXT: ldrh r0, [r0, #4]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: str r0, [r1, #8]
; CHECK-NEXT: .LBB1_10: @ %while.end17
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1
; CHECK-NEXT: ldrh r2, [r0, #8]
; CHECK-NEXT: ldrh r4, [r0, #10]
; CHECK-NEXT: ldrh r6, [r0, #12]
; CHECK-NEXT: ldrh r7, [r0, #14]
; CHECK-NEXT: lsls r7, r7, #16
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #16]
; CHECK-NEXT: str r4, [r1, #20]
; CHECK-NEXT: str r6, [r1, #24]
; CHECK-NEXT: str r7, [r1, #28]
; CHECK-NEXT: cmp r5, #2
; CHECK-NEXT: bne .LBB1_13
; CHECK-NEXT: @ %bb.12:
; CHECK-NEXT: subs r3, r3, #2
; CHECK-NEXT: adds r1, #32
; CHECK-NEXT: adds r0, #16
; CHECK-NEXT: b .LBB1_14
; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2
; CHECK-NEXT: ldrh r2, [r0, #16]
; CHECK-NEXT: ldrh r4, [r0, #18]
; CHECK-NEXT: ldrh r5, [r0, #20]
; CHECK-NEXT: ldrh r6, [r0, #22]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: lsls r5, r5, #16
; CHECK-NEXT: lsls r4, r4, #16
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: mov r7, r1
; CHECK-NEXT: adds r7, #32
; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
; CHECK-NEXT: subs r3, r3, #3
; CHECK-NEXT: adds r1, #48
; CHECK-NEXT: adds r0, #24
; CHECK-NEXT: .LBB1_14: @ %while.body.prol.loopexit
; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
; CHECK-NEXT: cmp r2, #3
; CHECK-NEXT: bhs .LBB1_5
; CHECK-NEXT: b .LBB1_6
entry:
%cmp.not18 = icmp ult i32 %blockSize, 4
br i1 %cmp.not18, label %while.end, label %while.body.preheader
while.body.preheader: ; preds = %entry
%shr = lshr i32 %blockSize, 2
%0 = add nsw i32 %shr, -1
%xtraiter = and i32 %shr, 3
%lcmp.mod.not = icmp eq i32 %xtraiter, 0
br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
while.body.prol: ; preds = %while.body.preheader
%arrayidx.i.prol = getelementptr inbounds i16, ptr %pSrc, i32 1
%1 = load i16, ptr %arrayidx.i.prol, align 2
%conv.i.prol = sext i16 %1 to i32
%shl.i.prol = shl nsw i32 %conv.i.prol, 16
%2 = load i16, ptr %pSrc, align 2
%conv22.i.prol = zext i16 %2 to i32
%add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
%add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
%arrayidx.i13.prol = getelementptr inbounds i16, ptr %pSrc, i32 3
%3 = load i16, ptr %arrayidx.i13.prol, align 2
%conv.i14.prol = sext i16 %3 to i32
%shl.i15.prol = shl nsw i32 %conv.i14.prol, 16
%4 = load i16, ptr %add.ptr3.prol, align 2
%conv22.i16.prol = zext i16 %4 to i32
%shl.prol = shl nuw i32 %conv22.i.prol, 16
%shl5.prol = shl nuw i32 %conv22.i16.prol, 16
%incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
store i32 %shl.prol, ptr %pDst, align 4
%incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
%incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
%incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
store i32 %shl.i15.prol, ptr %incdec.ptr8.prol, align 4
%dec.prol = add nsw i32 %shr, -1
%prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
while.body.prol.1: ; preds = %while.body.prol
%arrayidx.i.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 5
%5 = load i16, ptr %arrayidx.i.prol.1, align 2
%conv.i.prol.1 = sext i16 %5 to i32
%shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
%6 = load i16, ptr %add.ptr2.prol, align 2
%conv22.i.prol.1 = zext i16 %6 to i32
%add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
%add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
%arrayidx.i13.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 7
%7 = load i16, ptr %arrayidx.i13.prol.1, align 2
%conv.i14.prol.1 = sext i16 %7 to i32
%shl.i15.prol.1 = shl nsw i32 %conv.i14.prol.1, 16
%8 = load i16, ptr %add.ptr3.prol.1, align 2
%conv22.i16.prol.1 = zext i16 %8 to i32
%shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
%shl5.prol.1 = shl nuw i32 %conv22.i16.prol.1, 16
%incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
%incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
%incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
%incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
store i32 %shl.i15.prol.1, ptr %incdec.ptr8.prol.1, align 4
%dec.prol.1 = add nsw i32 %shr, -2
%prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
while.body.prol.2: ; preds = %while.body.prol.1
%arrayidx.i.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 9
%9 = load i16, ptr %arrayidx.i.prol.2, align 2
%conv.i.prol.2 = sext i16 %9 to i32
%shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
%10 = load i16, ptr %add.ptr2.prol.1, align 2
%conv22.i.prol.2 = zext i16 %10 to i32
%add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
%add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
%arrayidx.i13.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 11
%11 = load i16, ptr %arrayidx.i13.prol.2, align 2
%conv.i14.prol.2 = sext i16 %11 to i32
%shl.i15.prol.2 = shl nsw i32 %conv.i14.prol.2, 16
%12 = load i16, ptr %add.ptr3.prol.2, align 2
%conv22.i16.prol.2 = zext i16 %12 to i32
%shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
%shl5.prol.2 = shl nuw i32 %conv22.i16.prol.2, 16
%incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
%incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
%incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
%incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
store i32 %shl.i15.prol.2, ptr %incdec.ptr8.prol.2, align 4
%dec.prol.2 = add nsw i32 %shr, -3
br label %while.body.prol.loopexit
while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
%add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
%incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
%pDst.addr.021.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
%blkCnt.020.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
%pIn.019.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
%13 = icmp ult i32 %0, 3
br i1 %13, label %while.end, label %while.body
while.body: ; preds = %while.body.prol.loopexit, %while.body
%pDst.addr.021 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.021.unr, %while.body.prol.loopexit ]
%blkCnt.020 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.020.unr, %while.body.prol.loopexit ]
%pIn.019 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.019.unr, %while.body.prol.loopexit ]
%arrayidx.i = getelementptr inbounds i16, ptr %pIn.019, i32 1
%14 = load i16, ptr %arrayidx.i, align 2
%conv.i = sext i16 %14 to i32
%shl.i = shl nsw i32 %conv.i, 16
%15 = load i16, ptr %pIn.019, align 2
%conv22.i = zext i16 %15 to i32
%add.ptr2 = getelementptr inbounds i16, ptr %pIn.019, i32 4
%add.ptr3 = getelementptr inbounds i16, ptr %pIn.019, i32 2
%arrayidx.i13 = getelementptr inbounds i16, ptr %pIn.019, i32 3
%16 = load i16, ptr %arrayidx.i13, align 2
%conv.i14 = sext i16 %16 to i32
%shl.i15 = shl nsw i32 %conv.i14, 16
%17 = load i16, ptr %add.ptr3, align 2
%conv22.i16 = zext i16 %17 to i32
%shl = shl nuw i32 %conv22.i, 16
%shl5 = shl nuw i32 %conv22.i16, 16
%incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.021, i32 1
store i32 %shl, ptr %pDst.addr.021, align 4
%incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 2
store i32 %shl.i, ptr %incdec.ptr, align 4
%incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 3
store i32 %shl5, ptr %incdec.ptr7, align 4
%incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 4
store i32 %shl.i15, ptr %incdec.ptr8, align 4
%arrayidx.i.1 = getelementptr inbounds i16, ptr %pIn.019, i32 5
%18 = load i16, ptr %arrayidx.i.1, align 2
%conv.i.1 = sext i16 %18 to i32
%shl.i.1 = shl nsw i32 %conv.i.1, 16
%19 = load i16, ptr %add.ptr2, align 2
%conv22.i.1 = zext i16 %19 to i32
%add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.019, i32 8
%add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.019, i32 6
%arrayidx.i13.1 = getelementptr inbounds i16, ptr %pIn.019, i32 7
%20 = load i16, ptr %arrayidx.i13.1, align 2
%conv.i14.1 = sext i16 %20 to i32
%shl.i15.1 = shl nsw i32 %conv.i14.1, 16
%21 = load i16, ptr %add.ptr3.1, align 2
%conv22.i16.1 = zext i16 %21 to i32
%shl.1 = shl nuw i32 %conv22.i.1, 16
%shl5.1 = shl nuw i32 %conv22.i16.1, 16
%incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 5
store i32 %shl.1, ptr %incdec.ptr9, align 4
%incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 6
store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
%incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 7
store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
%incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 8
store i32 %shl.i15.1, ptr %incdec.ptr8.1, align 4
%arrayidx.i.2 = getelementptr inbounds i16, ptr %pIn.019, i32 9
%22 = load i16, ptr %arrayidx.i.2, align 2
%conv.i.2 = sext i16 %22 to i32
%shl.i.2 = shl nsw i32 %conv.i.2, 16
%23 = load i16, ptr %add.ptr2.1, align 2
%conv22.i.2 = zext i16 %23 to i32
%add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.019, i32 12
%add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.019, i32 10
%arrayidx.i13.2 = getelementptr inbounds i16, ptr %pIn.019, i32 11
%24 = load i16, ptr %arrayidx.i13.2, align 2
%conv.i14.2 = sext i16 %24 to i32
%shl.i15.2 = shl nsw i32 %conv.i14.2, 16
%25 = load i16, ptr %add.ptr3.2, align 2
%conv22.i16.2 = zext i16 %25 to i32
%shl.2 = shl nuw i32 %conv22.i.2, 16
%shl5.2 = shl nuw i32 %conv22.i16.2, 16
%incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 9
store i32 %shl.2, ptr %incdec.ptr9.1, align 4
%incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 10
store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
%incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 11
store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
%incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 12
store i32 %shl.i15.2, ptr %incdec.ptr8.2, align 4
%arrayidx.i.3 = getelementptr inbounds i16, ptr %pIn.019, i32 13
%26 = load i16, ptr %arrayidx.i.3, align 2
%conv.i.3 = sext i16 %26 to i32
%shl.i.3 = shl nsw i32 %conv.i.3, 16
%27 = load i16, ptr %add.ptr2.2, align 2
%conv22.i.3 = zext i16 %27 to i32
%add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.019, i32 16
%add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.019, i32 14
%arrayidx.i13.3 = getelementptr inbounds i16, ptr %pIn.019, i32 15
%28 = load i16, ptr %arrayidx.i13.3, align 2
%conv.i14.3 = sext i16 %28 to i32
%shl.i15.3 = shl nsw i32 %conv.i14.3, 16
%29 = load i16, ptr %add.ptr3.3, align 2
%conv22.i16.3 = zext i16 %29 to i32
%shl.3 = shl nuw i32 %conv22.i.3, 16
%shl5.3 = shl nuw i32 %conv22.i16.3, 16
%incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 13
store i32 %shl.3, ptr %incdec.ptr9.2, align 4
%incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 14
store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
%incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 15
store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
%incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 16
store i32 %shl.i15.3, ptr %incdec.ptr8.3, align 4
%dec.3 = add nsw i32 %blkCnt.020, -4
%cmp.not.3 = icmp eq i32 %dec.3, 0
br i1 %cmp.not.3, label %while.end, label %while.body
while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry
%pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
%pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
%rem = and i32 %blockSize, 3
%cmp11.not23 = icmp eq i32 %rem, 0
br i1 %cmp11.not23, label %while.end17, label %while.body12
while.body12: ; preds = %while.end
%30 = load i16, ptr %pIn.0.lcssa, align 2
%conv = sext i16 %30 to i32
%shl14 = shl nsw i32 %conv, 16
store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
%cmp11.not = icmp eq i32 %rem, 1
br i1 %cmp11.not, label %while.end17, label %while.body12.1
while.body12.1: ; preds = %while.body12
%incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
%incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
%31 = load i16, ptr %incdec.ptr13, align 2
%conv.1 = sext i16 %31 to i32
%shl14.1 = shl nsw i32 %conv.1, 16
store i32 %shl14.1, ptr %incdec.ptr15, align 4
%cmp11.not.1 = icmp eq i32 %rem, 2
br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
while.body12.2: ; preds = %while.body12.1
%incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
%incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
%32 = load i16, ptr %incdec.ptr13.1, align 2
%conv.2 = sext i16 %32 to i32
%shl14.2 = shl nsw i32 %conv.2, 16
store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
br label %while.end17
while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end
ret void
}