| # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py |
| # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s |
| |
| .LBB0_4: |
| vmovups (%rsi,%rax,2), %xmm0 |
| vpermilps $255, %xmm0, %xmm7 |
| vmulps -24(%rsp), %xmm7, %xmm8 |
| vpermilps $170, %xmm0, %xmm6 |
| vpermilps $85, %xmm0, %xmm5 |
| vbroadcastss %xmm0, %xmm0 |
| vfmadd231ps %xmm9, %xmm6, %xmm8 |
| vfmadd213ps %xmm8, %xmm10, %xmm5 |
| vfmadd213ps %xmm5, %xmm11, %xmm0 |
| vfmadd213ps %xmm0, %xmm12, %xmm4 |
| vfmadd213ps %xmm4, %xmm13, %xmm1 |
| vmovaps %xmm7, %xmm4 |
| vfmadd213ps %xmm1, %xmm14, %xmm2 |
| vmovaps %xmm6, %xmm1 |
| vfmadd213ps %xmm2, %xmm15, %xmm3 |
| vpermilps $170, %xmm3, %xmm0 |
| vmovups %xmm3, (%rdx,%rax) |
| vpermilps $255, %xmm3, %xmm2 |
| addq $16, %rax |
| decl %ecx |
| vmovaps %xmm0, %xmm3 |
| jne .LBB0_4 |
| |
| # CHECK: Iterations: 100 |
| # CHECK-NEXT: Instructions: 2200 |
| # CHECK-NEXT: Total Cycles: 1039 |
| # CHECK-NEXT: Total uOps: 2400 |
| |
| # CHECK: Dispatch Width: 6 |
| # CHECK-NEXT: uOps Per Cycle: 2.31 |
| # CHECK-NEXT: IPC: 2.12 |
| # CHECK-NEXT: Block RThroughput: 6.0 |
| |
| # CHECK: Cycles with backend pressure increase [ 92.69% ] |
| # CHECK-NEXT: Throughput Bottlenecks: |
| # CHECK-NEXT: Resource Pressure [ 46.78% ] |
| # CHECK-NEXT: - SKLPort0 [ 14.24% ] |
| # CHECK-NEXT: - SKLPort1 [ 14.24% ] |
| # CHECK-NEXT: - SKLPort5 [ 46.49% ] |
| # CHECK-NEXT: - SKLPort6 [ 8.66% ] |
| # CHECK-NEXT: Data Dependencies: [ 64.97% ] |
| # CHECK-NEXT: - Register Dependencies [ 64.97% ] |
| # CHECK-NEXT: - Memory Dependencies [ 0.00% ] |
| |
| # CHECK: Critical sequence based on the simulation: |
| |
| # CHECK: Instruction Dependency Information |
| # CHECK-NEXT: +----< 18. addq $16, %rax |
| # CHECK-NEXT: | |
| # CHECK-NEXT: | < loop carried > |
| # CHECK-NEXT: | |
| # CHECK-NEXT: +----> 0. vmovups (%rsi,%rax,2), %xmm0 ## REGISTER dependency: %rax |
| # CHECK-NEXT: | 1. vpermilps $255, %xmm0, %xmm7 |
| # CHECK-NEXT: | 2. vmulps -24(%rsp), %xmm7, %xmm8 |
| # CHECK-NEXT: +----> 3. vpermilps $170, %xmm0, %xmm6 ## REGISTER dependency: %xmm0 |
| # CHECK-NEXT: | 4. vpermilps $85, %xmm0, %xmm5 |
| # CHECK-NEXT: | 5. vbroadcastss %xmm0, %xmm0 |
| # CHECK-NEXT: +----> 6. vfmadd231ps %xmm9, %xmm6, %xmm8 ## REGISTER dependency: %xmm6 |
| # CHECK-NEXT: +----> 7. vfmadd213ps %xmm8, %xmm10, %xmm5 ## REGISTER dependency: %xmm8 |
| # CHECK-NEXT: +----> 8. vfmadd213ps %xmm5, %xmm11, %xmm0 ## REGISTER dependency: %xmm5 |
| # CHECK-NEXT: +----> 9. vfmadd213ps %xmm0, %xmm12, %xmm4 ## REGISTER dependency: %xmm0 |
| # CHECK-NEXT: +----> 10. vfmadd213ps %xmm4, %xmm13, %xmm1 ## REGISTER dependency: %xmm4 |
| # CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4 |
| # CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1 |
| # CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1 |
| # CHECK-NEXT: +----> 14. vfmadd213ps %xmm2, %xmm15, %xmm3 ## REGISTER dependency: %xmm2 |
| # CHECK-NEXT: +----> 15. vpermilps $170, %xmm3, %xmm0 ## REGISTER dependency: %xmm3 |
| # CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax) |
| # CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2 |
| # CHECK-NEXT: | 18. addq $16, %rax |
| # CHECK-NEXT: | 19. decl %ecx |
| # CHECK-NEXT: +----> 20. vmovaps %xmm0, %xmm3 ## REGISTER dependency: %xmm0 |
| # CHECK-NEXT: 21. jne .LBB0_4 |
| |
| # CHECK: Instruction Info: |
| # CHECK-NEXT: [1]: #uOps |
| # CHECK-NEXT: [2]: Latency |
| # CHECK-NEXT: [3]: RThroughput |
| # CHECK-NEXT: [4]: MayLoad |
| # CHECK-NEXT: [5]: MayStore |
| # CHECK-NEXT: [6]: HasSideEffects (U) |
| |
| # CHECK: [1] [2] [3] [4] [5] [6] Instructions: |
| # CHECK-NEXT: 1 6 0.50 * vmovups (%rsi,%rax,2), %xmm0 |
| # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm0, %xmm7 |
| # CHECK-NEXT: 2 10 0.50 * vmulps -24(%rsp), %xmm7, %xmm8 |
| # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm0, %xmm6 |
| # CHECK-NEXT: 1 1 1.00 vpermilps $85, %xmm0, %xmm5 |
| # CHECK-NEXT: 1 1 1.00 vbroadcastss %xmm0, %xmm0 |
| # CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm9, %xmm6, %xmm8 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm8, %xmm10, %xmm5 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm5, %xmm11, %xmm0 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm12, %xmm4 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm4, %xmm13, %xmm1 |
| # CHECK-NEXT: 1 1 0.33 vmovaps %xmm7, %xmm4 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm1, %xmm14, %xmm2 |
| # CHECK-NEXT: 1 1 0.33 vmovaps %xmm6, %xmm1 |
| # CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm2, %xmm15, %xmm3 |
| # CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm3, %xmm0 |
| # CHECK-NEXT: 2 1 1.00 * vmovups %xmm3, (%rdx,%rax) |
| # CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm3, %xmm2 |
| # CHECK-NEXT: 1 1 0.25 addq $16, %rax |
| # CHECK-NEXT: 1 1 0.25 decl %ecx |
| # CHECK-NEXT: 1 1 0.33 vmovaps %xmm0, %xmm3 |
| # CHECK-NEXT: 1 1 0.50 jne .LBB0_4 |
| |
| # CHECK: Resources: |
| # CHECK-NEXT: [0] - SKLDivider |
| # CHECK-NEXT: [1] - SKLFPDivider |
| # CHECK-NEXT: [2] - SKLPort0 |
| # CHECK-NEXT: [3] - SKLPort1 |
| # CHECK-NEXT: [4] - SKLPort2 |
| # CHECK-NEXT: [5] - SKLPort3 |
| # CHECK-NEXT: [6] - SKLPort4 |
| # CHECK-NEXT: [7] - SKLPort5 |
| # CHECK-NEXT: [8] - SKLPort6 |
| # CHECK-NEXT: [9] - SKLPort7 |
| |
| # CHECK: Resource pressure per iteration: |
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] |
| # CHECK-NEXT: - - 5.52 5.53 1.01 1.03 1.00 6.02 2.93 0.96 |
| |
| # CHECK: Resource pressure by instruction: |
| # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: |
| # CHECK-NEXT: - - - - 0.04 0.96 - - - - vmovups (%rsi,%rax,2), %xmm0 |
| # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm0, %xmm7 |
| # CHECK-NEXT: - - 0.03 0.97 0.96 0.04 - - - - vmulps -24(%rsp), %xmm7, %xmm8 |
| # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm0, %xmm6 |
| # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $85, %xmm0, %xmm5 |
| # CHECK-NEXT: - - - - - - - 1.00 - - vbroadcastss %xmm0, %xmm0 |
| # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd231ps %xmm9, %xmm6, %xmm8 |
| # CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm8, %xmm10, %xmm5 |
| # CHECK-NEXT: - - 0.92 0.08 - - - - - - vfmadd213ps %xmm5, %xmm11, %xmm0 |
| # CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd213ps %xmm0, %xmm12, %xmm4 |
| # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm4, %xmm13, %xmm1 |
| # CHECK-NEXT: - - 0.52 0.48 - - - - - - vmovaps %xmm7, %xmm4 |
| # CHECK-NEXT: - - 0.49 0.51 - - - - - - vfmadd213ps %xmm1, %xmm14, %xmm2 |
| # CHECK-NEXT: - - 0.04 0.95 - - - 0.01 - - vmovaps %xmm6, %xmm1 |
| # CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm2, %xmm15, %xmm3 |
| # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm3, %xmm0 |
| # CHECK-NEXT: - - - - 0.01 0.03 1.00 - - 0.96 vmovups %xmm3, (%rdx,%rax) |
| # CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm3, %xmm2 |
| # CHECK-NEXT: - - - - - - - - 1.00 - addq $16, %rax |
| # CHECK-NEXT: - - 0.04 0.01 - - - 0.01 0.94 - decl %ecx |
| # CHECK-NEXT: - - 0.05 0.95 - - - - - - vmovaps %xmm0, %xmm3 |
| # CHECK-NEXT: - - 0.01 - - - - - 0.99 - jne .LBB0_4 |