| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 |
| ; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s |
| |
| declare void @use(ptr %p) |
| declare void @llvm.aarch64.settag(ptr %p, i64 %a) |
| declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a) |
| |
| define void @stg16_16() { |
| ; CHECK-LABEL: stg16_16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #32 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NEXT: st2g sp, [sp], #32 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 16) |
| ret void |
| } |
| |
| define i32 @stg16_16_16_16_ret() { |
| ; CHECK-LABEL: stg16_16_16_16_ret: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: mov w0, wzr |
| ; CHECK-NEXT: st2g sp, [sp, #32] |
| ; CHECK-NEXT: st2g sp, [sp], #64 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 16, align 16 |
| %c = alloca i8, i32 16, align 16 |
| %d = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 16) |
| call void @llvm.aarch64.settag(ptr %c, i64 16) |
| call void @llvm.aarch64.settag(ptr %d, i64 16) |
| ret i32 0 |
| } |
| |
| define void @stg16_16_16_16() { |
| ; CHECK-LABEL: stg16_16_16_16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #64 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 64 |
| ; CHECK-NEXT: st2g sp, [sp, #32] |
| ; CHECK-NEXT: st2g sp, [sp], #64 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 16, align 16 |
| %c = alloca i8, i32 16, align 16 |
| %d = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 16) |
| call void @llvm.aarch64.settag(ptr %c, i64 16) |
| call void @llvm.aarch64.settag(ptr %d, i64 16) |
| ret void |
| } |
| |
| define void @stg128_128_128_128() { |
| ; CHECK-LABEL: stg128_128_128_128: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #512 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 528 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: mov x8, #512 // =0x200 |
| ; CHECK-NEXT: .LBB3_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g sp, [sp], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB3_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 128, align 16 |
| %b = alloca i8, i32 128, align 16 |
| %c = alloca i8, i32 128, align 16 |
| %d = alloca i8, i32 128, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 128) |
| call void @llvm.aarch64.settag(ptr %b, i64 128) |
| call void @llvm.aarch64.settag(ptr %c, i64 128) |
| call void @llvm.aarch64.settag(ptr %d, i64 128) |
| ret void |
| } |
| |
| define void @stg16_512_16() { |
| ; CHECK-LABEL: stg16_512_16: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #544 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 560 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: mov x8, #544 // =0x220 |
| ; CHECK-NEXT: .LBB4_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g sp, [sp], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB4_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 512, align 16 |
| %c = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 512) |
| call void @llvm.aarch64.settag(ptr %c, i64 16) |
| ret void |
| } |
| |
| define void @stg512_512_512() { |
| ; CHECK-LABEL: stg512_512_512: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1536 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1552 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: mov x8, #1536 // =0x600 |
| ; CHECK-NEXT: .LBB5_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g sp, [sp], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB5_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 512, align 16 |
| %b = alloca i8, i32 512, align 16 |
| %c = alloca i8, i32 512, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 512) |
| call void @llvm.aarch64.settag(ptr %b, i64 512) |
| call void @llvm.aarch64.settag(ptr %c, i64 512) |
| ret void |
| } |
| |
| define void @early(i1 %flag) { |
| ; CHECK-LABEL: early: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #144 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 144 |
| ; CHECK-NEXT: tbz w0, #0, .LBB6_2 |
| ; CHECK-NEXT: // %bb.1: // %if.then |
| ; CHECK-NEXT: st2g sp, [sp, #48] |
| ; CHECK-NEXT: st2g sp, [sp, #80] |
| ; CHECK-NEXT: st2g sp, [sp, #112] |
| ; CHECK-NEXT: .LBB6_2: // %if.end |
| ; CHECK-NEXT: stg sp, [sp, #32] |
| ; CHECK-NEXT: st2g sp, [sp], #144 |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 48, align 16 |
| %b = alloca i8, i32 48, align 16 |
| %c = alloca i8, i32 48, align 16 |
| br i1 %flag, label %if.then, label %if.end |
| |
| if.then: |
| call void @llvm.aarch64.settag(ptr %a, i64 48) |
| call void @llvm.aarch64.settag(ptr %b, i64 48) |
| br label %if.end |
| |
| if.end: |
| call void @llvm.aarch64.settag(ptr %c, i64 48) |
| ret void |
| } |
| |
| define void @early_128_128(i1 %flag) { |
| ; CHECK-LABEL: early_128_128: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: sub sp, sp, #320 |
| ; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill |
| ; CHECK-NEXT: .cfi_def_cfa_offset 320 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: tbz w0, #0, .LBB7_4 |
| ; CHECK-NEXT: // %bb.1: // %if.then |
| ; CHECK-NEXT: add x9, sp, #48 |
| ; CHECK-NEXT: mov x8, #256 // =0x100 |
| ; CHECK-NEXT: .LBB7_2: // %if.then |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g x9, [x9], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB7_2 |
| ; CHECK-NEXT: // %bb.3: // %if.then |
| ; CHECK-NEXT: .LBB7_4: // %if.end |
| ; CHECK-NEXT: stg sp, [sp, #32] |
| ; CHECK-NEXT: st2g sp, [sp], #304 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 128, align 16 |
| %b = alloca i8, i32 128, align 16 |
| %c = alloca i8, i32 48, align 16 |
| br i1 %flag, label %if.then, label %if.end |
| |
| if.then: |
| call void @llvm.aarch64.settag(ptr %a, i64 128) |
| call void @llvm.aarch64.settag(ptr %b, i64 128) |
| br label %if.end |
| |
| if.end: |
| call void @llvm.aarch64.settag(ptr %c, i64 48) |
| ret void |
| } |
| |
| define void @early_512_512(i1 %flag) { |
| ; CHECK-LABEL: early_512_512: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #1072 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 1088 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: tbz w0, #0, .LBB8_4 |
| ; CHECK-NEXT: // %bb.1: // %if.then |
| ; CHECK-NEXT: add x9, sp, #48 |
| ; CHECK-NEXT: mov x8, #1024 // =0x400 |
| ; CHECK-NEXT: .LBB8_2: // %if.then |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g x9, [x9], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB8_2 |
| ; CHECK-NEXT: // %bb.3: // %if.then |
| ; CHECK-NEXT: .LBB8_4: // %if.end |
| ; CHECK-NEXT: stg sp, [sp, #32] |
| ; CHECK-NEXT: st2g sp, [sp], #1072 |
| ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 512, align 16 |
| %b = alloca i8, i32 512, align 16 |
| %c = alloca i8, i32 48, align 16 |
| br i1 %flag, label %if.then, label %if.end |
| |
| if.then: |
| call void @llvm.aarch64.settag(ptr %a, i64 512) |
| call void @llvm.aarch64.settag(ptr %b, i64 512) |
| br label %if.end |
| |
| if.end: |
| call void @llvm.aarch64.settag(ptr %c, i64 48) |
| ret void |
| } |
| |
| ; Two loops of size 256; the second loop updates SP. |
| define void @stg128_128_gap_128_128() { |
| ; CHECK-LABEL: stg128_128_gap_128_128: |
| ; CHECK: // %bb.0: // %entry |
| ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill |
| ; CHECK-NEXT: sub sp, sp, #544 |
| ; CHECK-NEXT: .cfi_def_cfa_offset 560 |
| ; CHECK-NEXT: .cfi_offset w30, -8 |
| ; CHECK-NEXT: .cfi_offset w29, -16 |
| ; CHECK-NEXT: add x0, sp, #256 |
| ; CHECK-NEXT: bl use |
| ; CHECK-NEXT: mov x9, sp |
| ; CHECK-NEXT: mov x8, #256 // =0x100 |
| ; CHECK-NEXT: .LBB9_1: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g x9, [x9], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB9_1 |
| ; CHECK-NEXT: // %bb.2: // %entry |
| ; CHECK-NEXT: add sp, sp, #288 |
| ; CHECK-NEXT: mov x8, #256 // =0x100 |
| ; CHECK-NEXT: .LBB9_3: // %entry |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: st2g sp, [sp], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB9_3 |
| ; CHECK-NEXT: // %bb.4: // %entry |
| ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload |
| ; CHECK-NEXT: ret |
| entry: |
| %a = alloca i8, i32 128, align 16 |
| %a2 = alloca i8, i32 128, align 16 |
| %b = alloca i8, i32 32, align 16 |
| %c = alloca i8, i32 128, align 16 |
| %c2 = alloca i8, i32 128, align 16 |
| call void @use(ptr %b) |
| call void @llvm.aarch64.settag(ptr %a, i64 128) |
| call void @llvm.aarch64.settag(ptr %a2, i64 128) |
| call void @llvm.aarch64.settag(ptr %c, i64 128) |
| call void @llvm.aarch64.settag(ptr %c2, i64 128) |
| ret void |
| } |
| |
| ; Function Attrs: nounwind |
| declare i32 @printf(ptr, ...) #0 |
| |
| @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 |
| |
| ; Case 1 |
| ; Insert point of stg merge is followed by nzcv read |
| ; Don't merge in this case |
| |
| define i32 @nzcv_clobber(i32 %in) { |
| entry: |
| ; CHECK-LABEL: nzcv_clobber: |
| ; CHECK: stg sp, [sp, #528] |
| ; CHECK-NEXT: .LBB10_1: |
| ; CHECK: st2g x9, [x9], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB10_1 |
| ; CHECK-NEXT: // %bb.2: |
| ; CHECK-NEXT: cmp w0, #10 |
| ; CHECK-NEXT: stg sp, [sp] |
| ; CHECK-NEXT: b.ge .LBB10_4 |
| |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 512, align 16 |
| %c = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 512) |
| %cmp = icmp slt i32 %in, 10 |
| call void @llvm.aarch64.settag(ptr %c, i64 16) |
| br i1 %cmp, label %return0, label %return1 |
| |
| return0: ; preds = %entry |
| %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1 |
| ret i32 0 |
| |
| return1: |
| ret i32 1 |
| } |
| |
| ; Case 2 |
| ; Insert point of stg merge is not followed by nzcv read |
| ; Merge in this case |
| |
| define i32 @nzcv_no_clobber(i32 %in) { |
| entry: |
| ; CHECK-LABEL: nzcv_no_clobber: |
| ; CHECK: mov x8, #544 |
| ; CHECK-NEXT: .LBB11_1: |
| ; CHECK: st2g sp, [sp], #32 |
| ; CHECK-NEXT: subs x8, x8, #32 |
| ; CHECK-NEXT: b.ne .LBB11_1 |
| |
| |
| %a = alloca i8, i32 16, align 16 |
| %b = alloca i8, i32 512, align 16 |
| %c = alloca i8, i32 16, align 16 |
| call void @llvm.aarch64.settag(ptr %a, i64 16) |
| call void @llvm.aarch64.settag(ptr %b, i64 512) |
| call void @llvm.aarch64.settag(ptr %c, i64 16) |
| br label %return1 |
| |
| return0: ; preds = %entry |
| %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1 |
| ret i32 0 |
| |
| return1: |
| ret i32 1 |
| } |