blob: 2bdc3f671897ce3bcbcc330251a89eccb0984739 [file] [log] [blame]
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX8 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX9 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass=si-fold-operands %s -o - | FileCheck -check-prefixes=CHECK,GFX10 %s
---
name: copy_undef
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: copy_undef
; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY undef %2:sreg_32
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 %stack.0
%2:vgpr_32 = COPY undef %1:sreg_32
SI_RETURN implicit %2
...
---
name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = nuw V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
;
; GFX9-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr
; GFX9: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
;
; GFX10-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr
; GFX10: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = nuw S_ADD_I32 %0, 128, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_add_i32__const_copy_mov_fi_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__const_copy_mov_fi_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
;
; GFX9-LABEL: name: fold_s_add_i32__const_copy_mov_fi_to_virt_vgpr
; GFX9: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
;
; GFX10-LABEL: name: fold_s_add_i32__const_copy_mov_fi_to_virt_vgpr
; GFX10: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_ADD_I32 128, %0, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = nuw V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__fi_imm_copy_to_virt_vgpr
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = nuw S_ADD_I32 %stack.0, 64, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = nuw V_ADD_CO_U32_e64 64, %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__imm_fi_copy_to_virt_vgpr
; GFX10: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = nuw V_ADD_U32_e64 64, %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = nuw S_ADD_I32 64, %stack.0, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_phys_vgpr
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def dead $scc
; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit $vgpr0
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_ADD_I32 %0, 128, implicit-def dead $scc
$vgpr0 = COPY %1
SI_RETURN implicit $vgpr0
...
---
name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_vcc
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $vcc
; GFX8-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_vcc
; GFX8: liveins: $vcc
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def dead $scc
; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
; GFX8-NEXT: SI_RETURN implicit [[COPY]], implicit $vcc
;
; GFX9-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_vcc
; GFX9: liveins: $vcc
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]], implicit $vcc
;
; GFX10-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_vcc
; GFX10: liveins: $vcc
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]], implicit $vcc
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_ADD_I32 %0, 128, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2, implicit $vcc
...
---
name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_scc
tracksRegLiveness: true
frameInfo:
maxAlignment: 4
localFrameSize: 16384
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_add_i32__mov_fi_const_copy_to_virt_vgpr_live_scc
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: SI_RETURN implicit [[COPY]], implicit $scc
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_ADD_I32 %0, 128, implicit-def $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2, implicit $scc
...
---
name: fold_s_add_i32__mov_fi_reg_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr8
; GFX8-LABEL: name: fold_s_add_i32__mov_fi_reg_copy_to_virt_vgpr
; GFX8: liveins: $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__mov_fi_reg_copy_to_virt_vgpr
; GFX9: liveins: $sgpr8
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__mov_fi_reg_copy_to_virt_vgpr
; GFX10: liveins: $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = COPY $sgpr8
%1:sreg_32 = S_MOV_B32 %stack.0
%2:sreg_32 = S_ADD_I32 %0, %1, implicit-def dead $scc
%3:vgpr_32 = COPY %2
SI_RETURN implicit %3
...
---
name: fold_s_add_i32__reg_copy_mov_fi_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr8
; GFX8-LABEL: name: fold_s_add_i32__reg_copy_mov_fi_to_virt_vgpr
; GFX8: liveins: $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__reg_copy_mov_fi_to_virt_vgpr
; GFX9: liveins: $sgpr8
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__reg_copy_mov_fi_to_virt_vgpr
; GFX10: liveins: $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = COPY $sgpr8
%1:sreg_32 = S_MOV_B32 %stack.0
%2:sreg_32 = S_ADD_I32 %1, %0, implicit-def dead $scc
%3:vgpr_32 = COPY %2
SI_RETURN implicit %3
...
---
name: fold_s_add_i32__fi_fi_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
- { id: 1, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_add_i32__fi_fi_copy_to_virt_vgpr
; CHECK: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, %stack.1, implicit-def dead $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
%0:sreg_32 = S_ADD_I32 %stack.0, %stack.1, implicit-def dead $scc
%1:vgpr_32 = COPY %1
SI_RETURN implicit %1
...
---
name: fold_s_add_i32__fi_const_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__fi_const_copy_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
;
; GFX9-LABEL: name: fold_s_add_i32__fi_const_copy_to_virt_vgpr
; GFX9: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
;
; GFX10-LABEL: name: fold_s_add_i32__fi_const_copy_to_virt_vgpr
; GFX10: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
%0:sreg_32 = S_ADD_I32 %stack.0, 128, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_add_i32__const_fi_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; GFX8-LABEL: name: fold_s_add_i32__const_fi_copy_to_virt_vgpr
; GFX8: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 128, %stack.0, implicit-def dead $vcc, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e32_]]
;
; GFX9-LABEL: name: fold_s_add_i32__const_fi_copy_to_virt_vgpr
; GFX9: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
;
; GFX10-LABEL: name: fold_s_add_i32__const_fi_copy_to_virt_vgpr
; GFX10: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, %stack.0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e32_]]
%0:sreg_32 = S_ADD_I32 128, %stack.0, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_add_i32__fi_reg_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr8
; GFX8-LABEL: name: fold_s_add_i32__fi_reg_copy_to_virt_vgpr
; GFX8: liveins: $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__fi_reg_copy_to_virt_vgpr
; GFX9: liveins: $sgpr8
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__fi_reg_copy_to_virt_vgpr
; GFX10: liveins: $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = COPY $sgpr8
%1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_add_i32__reg_fi_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr8
; GFX8-LABEL: name: fold_s_add_i32__reg_fi_copy_to_virt_vgpr
; GFX8: liveins: $sgpr8
; GFX8-NEXT: {{ $}}
; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX8-NEXT: SI_RETURN implicit [[V_ADD_CO_U32_e64_]]
;
; GFX9-LABEL: name: fold_s_add_i32__reg_fi_copy_to_virt_vgpr
; GFX9: liveins: $sgpr8
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX9-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
;
; GFX10-LABEL: name: fold_s_add_i32__reg_fi_copy_to_virt_vgpr
; GFX10: liveins: $sgpr8
; GFX10-NEXT: {{ $}}
; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr8
; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], %stack.0, 0, implicit $exec
; GFX10-NEXT: SI_RETURN implicit [[V_ADD_U32_e64_]]
%0:sreg_32 = COPY $sgpr8
%1:sreg_32 = S_ADD_I32 %0, %stack.0, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_or_b32__mov_fi_const_copy_to_virt_vgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_OR_B32 %0, 128, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_or_b32__const_copy_mov_fi_to_virt_vgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 128, [[S_MOV_B32_]], implicit-def dead $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_OR_B32_]]
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_OR_B32 128, %0, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_or_b32__fi_imm_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_or_b32__fi_imm_copy_to_virt_vgpr
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
%0:sreg_32 = disjoint S_OR_B32 %stack.0, 64, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_or_b32__imm_fi_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_or_b32__imm_fi_copy_to_virt_vgpr
; CHECK: [[V_OR_B32_e64_:%[0-9]+]]:vgpr_32 = disjoint V_OR_B32_e64 64, %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_OR_B32_e64_]]
%0:sreg_32 = disjoint S_OR_B32 64, %stack.0, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_and_b32__fi_imm_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_and_b32__fi_imm_copy_to_virt_vgpr
; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 64, %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e64_]]
%0:sreg_32 = S_AND_B32 %stack.0, 64, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_and_b32__fi_const_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_and_b32__fi_const_copy_to_virt_vgpr
; CHECK: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 128, %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_AND_B32_e32_]]
%0:sreg_32 = S_AND_B32 %stack.0, 128, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_mul_i32__fi_imm_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_mul_i32__fi_imm_copy_to_virt_vgpr
; CHECK: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 64, %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_MUL_LO_U32_e64_]]
%0:sreg_32 = S_MUL_I32 %stack.0, 64, implicit-def dead $scc
%1:vgpr_32 = COPY %0
SI_RETURN implicit %1
...
---
name: fold_s_mul_i32__fi_reg_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
liveins: $sgpr4
; CHECK-LABEL: name: fold_s_mul_i32__fi_reg_copy_to_virt_vgpr
; CHECK: liveins: $sgpr4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[COPY]], %stack.0, implicit $exec
; CHECK-NEXT: SI_RETURN implicit [[V_MUL_LO_U32_e64_]]
%0:sreg_32 = COPY $sgpr4
%1:sreg_32 = S_MUL_I32 %stack.0, %0, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
---
name: fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
tracksRegLiveness: true
stack:
- { id: 0, size: 16384, alignment: 4, local-offset: 0 }
body: |
bb.0:
; CHECK-LABEL: name: fold_s_and_b32__mov_fi_const_copy_to_virt_vgpr
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.0
; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[S_MOV_B32_]], 128, implicit-def dead $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[S_AND_B32_]]
; CHECK-NEXT: SI_RETURN implicit [[COPY]]
%0:sreg_32 = S_MOV_B32 %stack.0
%1:sreg_32 = S_AND_B32 %0, 128, implicit-def dead $scc
%2:vgpr_32 = COPY %1
SI_RETURN implicit %2
...
# Physreg copy of %2 to $vgpr0 should not be erased
---
name: fold_fi_into_s_or_b32_user_is_physreg_copy
tracksRegLiveness: true
stack:
- { id: 0, size: 16, alignment: 16 }
machineFunctionInfo:
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
frameOffsetReg: '$sgpr33'
stackPtrOffsetReg: '$sgpr32'
body: |
; CHECK-LABEL: name: fold_fi_into_s_or_b32_user_is_physreg_copy
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 %stack.0, 4, implicit-def dead $scc
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub1, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE]], [[COPY]], implicit $exec
; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $vgpr0 = COPY [[S_ADD_I32_]]
; CHECK-NEXT: $sgpr30_sgpr31 = SI_CALL [[REG_SEQUENCE]], 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
; CHECK-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]]
; CHECK-NEXT: SI_RETURN
bb.0:
liveins: $vgpr0_vgpr1
%0:vreg_64 = COPY $vgpr0_vgpr1
%1:sreg_32 = S_MOV_B32 %stack.0
%2:sreg_32 = S_ADD_I32 killed %1, 4, implicit-def dead $scc
%3:sreg_64_xexec = S_MOV_B64 $exec
bb.1:
%4:sreg_32_xm0 = V_READFIRSTLANE_B32 %0.sub0, implicit $exec
%5:sreg_32_xm0 = V_READFIRSTLANE_B32 %0.sub1, implicit $exec
%6:sgpr_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
%7:sreg_64_xexec = V_CMP_EQ_U64_e64 %6, %0, implicit $exec
%8:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed %7, implicit-def $exec, implicit-def $scc, implicit $exec
ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
$vgpr0 = COPY %2
$sgpr30_sgpr31 = SI_CALL %6, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $vgpr0
ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32
$exec = S_XOR_B64_term $exec, %8, implicit-def $scc
SI_WATERFALL_LOOP %bb.1, implicit $exec
bb.2:
$exec = S_MOV_B64 %3
SI_RETURN
...