| # RUN: llc %s -o - -run-pass=if-converter -debug-only=if-converter 2>%t| FileCheck %s |
| # RUN: FileCheck %s < %t --check-prefix=DEBUG |
| # REQUIRES: asserts |
| |
| # When optimising for size, we use a different set of heuristics for |
| # if-conversion, which take into account the size of the instructions, not the |
| # time taken to execute them. This is more complicated for Thumb, where it if |
| # also affected by selection of narrow branch instructions, insertion if IT |
| # instructions, and selection of the CB(N)Z instructions. |
| |
| --- | |
| target triple = "thumbv7-unknown-linux-gnueabi" |
| |
| define void @fn1() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn2() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn3() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn4() minsize "target-features"="-thumb-mode" { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn5() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn6() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if2.then: |
| unreachable |
| if2.else: |
| unreachable |
| } |
| |
| define void @fn7() minsize "target-features"="-thumb-mode" { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn8() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| if.end: |
| unreachable |
| } |
| |
| define void @fn9() minsize { |
| entry: |
| unreachable |
| if.then: |
| unreachable |
| if.else: |
| unreachable |
| lab1: |
| unreachable |
| } |
| ... |
| --- |
| name: fn1 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # If-conversion is profitable here because it will remove two branches of 2 |
| # bytes each (assuming they can become narrow branches later), and will only |
| # add 2 bytes with the IT instruction. |
| |
| # CHECK-LABEL: name: fn1 |
| # CHECK: t2CMPri |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRSHi12 |
| # CHECK-NEXT: t2MOVi |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn1' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| t2B %bb.3, 14, $noreg |
| |
| bb.2.if.else: |
| successors: %bb.3(0x80000000) |
| liveins: $r1, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| |
| bb.3.if.end: |
| liveins: $r0, $r3 |
| |
| renamable $r1 = t2MOVi 0, 14, $noreg, $noreg |
| t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn2 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # If-conversion is not profitable here, because the 5 conditional instructions |
| # would require 2 IT instructions. |
| |
| # CHECK-LABEL: name: fn2 |
| # CHECK: t2CMPri |
| # CHECK-NEXT: t2Bcc |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn2' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=4) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| t2B %bb.3, 14, $noreg |
| |
| bb.2.if.else: |
| successors: %bb.3(0x80000000) |
| liveins: $r1, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| |
| bb.3.if.end: |
| liveins: $r0, $r3 |
| |
| renamable $r1 = t2MOVi 0, 14, $noreg, $noreg |
| t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn3 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # Here, the true and false blocks both end in a tBX_RET instruction. One of |
| # these will be removed, saving 2 bytes, and the remaining one isn't |
| # conditional, so doesn't push us over the limit of 4 instructions in an IT |
| # block. |
| |
| # CHECK-LABEL: name: fn3 |
| # CHECK: t2CMPri |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRSHi12 |
| # CHECK-NEXT: tBX_RET |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn3' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| bb.2.if.else: |
| liveins: $r1, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn4 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # This is the same as fn2, but compiled for ARM, which doesn't need IT |
| # instructions, so if-conversion is profitable. |
| |
| # CHECK-LABEL: name: fn4 |
| # CHECK: CMPri |
| # CHECK-NEXT: LDRi12 |
| # CHECK-NEXT: LDRi12 |
| # CHECK-NEXT: LDRSH |
| # CHECK-NEXT: LDRi12 |
| # CHECK-NEXT: LDRi12 |
| # CHECK-NEXT: MOVi |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn4' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=8, CommonBytes=0, NumPredicatedInstructions=5, ExtraPredicateBytes=0) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| B %bb.3 |
| |
| bb.2.if.else: |
| successors: %bb.3(0x80000000) |
| liveins: $r1, $r3 |
| |
| renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg |
| |
| bb.3.if.end: |
| liveins: $r0, $r3 |
| |
| renamable $r1 = MOVi 0, 14, $noreg, $noreg |
| STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| BX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn5 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # Here, the compare and conditional branch can be turned into a CBZ, so we |
| # don't want to if-convert. |
| |
| # CHECK-LABEL: name: fn5 |
| # CHECK: t2CMPri |
| # CHECK: t2Bcc |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn5' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=0, CommonBytes=2, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x30000000), %bb.2(0x50000000) |
| liveins: $r0, $r1, $r2 |
| |
| t2CMPri killed renamable $r2, 0, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 1, killed $cpsr |
| |
| bb.1.if.then: |
| liveins: $r0 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| bb.2.if.else: |
| liveins: $r1 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn6 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # This is a forked-diamond pattern, we recognise that the conditional branches |
| # at the ends of the true and false blocks are the same, and can be shared. |
| |
| # CHECK-LABEL: name: fn6 |
| # CHECK: t2CMPri |
| # CHECK-NEXT: t2LDRSHi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2CMPri |
| # CHECK-NEXT: t2Bcc |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn6' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=12, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x30000000), %bb.2(0x50000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| t2CMPri killed renamable $r2, 4, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 1, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x30000000), %bb.4(0x50000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.3.if2.then, 1, killed $cpsr |
| t2B %bb.4.if2.else, 14, $noreg |
| |
| bb.2.if.else: |
| successors: %bb.3(0x30000000), %bb.4(0x50000000) |
| liveins: $r0, $r1, $r3 |
| |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| t2CMPri renamable $r0, 0, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.3.if2.then, 1, killed $cpsr |
| t2B %bb.4.if2.else, 14, $noreg |
| |
| bb.3.if2.then: |
| liveins: $r0, $r1, $r3 |
| |
| t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| bb.4.if2.else: |
| liveins: $r0 |
| |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn7 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # When compiling for ARM, it would be good for code size to generate very long |
| # runs of conditional instructions, but we put an (arbitrary) limit on this to |
| # avoid generating code which is very bad for performance, and only saves a few |
| # bytes of code size. |
| |
| # CHECK-LABEL: name: fn7 |
| # CHECK: CMPri |
| # CHECK-NEXT: Bcc |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| B %bb.3 |
| |
| bb.2.if.else: |
| successors: %bb.3(0x80000000) |
| liveins: $r1, $r3 |
| |
| renamable $r0 = LDRi12 killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = LDRSH killed renamable $r0, $noreg, 0, 14, $noreg |
| |
| bb.3.if.end: |
| liveins: $r0, $r3 |
| |
| renamable $r1 = MOVi 0, 14, $noreg, $noreg |
| STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| BX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn8 |
| alignment: 1 |
| tracksRegLiveness: true |
| |
| # The first t2LDRi12 instruction in each branch is the same, so one copy of it |
| # will be removed, and it doesn't need to be predicated, keeping us under the 4 |
| # instruction IT block limit. |
| |
| # CHECK-LABEL: name: fn8 |
| # CHECK: t2CMPri |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRi12 |
| # CHECK-NEXT: t2LDRSHi12 |
| # CHECK-NEXT: t2MOVi |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn8' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=4, CommonBytes=4, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x40000000), %bb.2(0x40000000) |
| liveins: $r0, $r1, $r2, $r3 |
| |
| t2CMPri killed renamable $r2, 5, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.2, 11, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 4, 14, $noreg |
| t2B %bb.3, 14, $noreg |
| |
| bb.2.if.else: |
| successors: %bb.3(0x80000000) |
| liveins: $r0, $r3 |
| |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRi12 killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| |
| bb.3.if.end: |
| liveins: $r0, $r3 |
| |
| renamable $r1 = t2MOVi 0, 14, $noreg, $noreg |
| t2STRi12 killed renamable $r1, killed renamable $r3, 0, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| |
| --- |
| name: fn9 |
| alignment: 2 |
| tracksRegLiveness: true |
| |
| # The INLINEASM_BR instructions aren't analyzable, but they are identical so we |
| # can still do diamond if-conversion. From a code-size POV, they are common |
| # instructions, so one will be removed, and they don't need an IT block slot. |
| |
| # CHECK-LABEL: name: fn9 |
| # CHECK: tCMPi8 |
| # CHECK-NEXT: tLDRi |
| # CHECK-NEXT: tLDRi |
| # CHECK-NEXT: tLDRi |
| # CHECK-NEXT: t2LDRSHi12 |
| # CHECK-NEXT: INLINEASM_BR |
| |
| # DEBUG-LABEL: Ifcvt: function ({{[0-9]+}}) 'fn9' |
| # DEBUG: MeetIfcvtSizeLimit(BranchBytes=2, CommonBytes=6, NumPredicatedInstructions=4, ExtraPredicateBytes=2) |
| |
| body: | |
| bb.0.entry: |
| successors: %bb.1(0x30000000), %bb.3(0x50000000) |
| liveins: $r0, $r1, $r2 |
| |
| tCMPi8 killed renamable $r2, 42, 14, $noreg, implicit-def $cpsr |
| t2Bcc %bb.3, 1, killed $cpsr |
| |
| bb.1.if.then: |
| successors: %bb.5(0x7fffffff) |
| liveins: $r0 |
| |
| renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg |
| INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) |
| |
| bb.3.if.else: |
| successors: %bb.5(0x7fffffff) |
| liveins: $r1 |
| |
| renamable $r0 = tLDRi killed renamable $r1, 0, 14, $noreg |
| renamable $r0 = tLDRi killed renamable $r0, 0, 14, $noreg |
| renamable $r0 = t2LDRSHi12 killed renamable $r0, 0, 14, $noreg |
| INLINEASM_BR &"b ${0:l}", 1, 13, blockaddress(@fn9, %ir-block.lab1) |
| |
| bb.5.lab1 (address-taken): |
| liveins: $r0 |
| |
| renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 5, 14, $noreg |
| tBX_RET 14, $noreg, implicit $r0 |
| ... |