| ; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK32 |
| ; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK64 |
| |
| ; TODO: Reenable verify-machineinstrs once the if (!AXDead) // FIXME in |
| ; X86InstrInfo::copyPhysReg() is resolved. |
| |
| ; The peephole optimizer can elide some physical register copies such as |
| ; EFLAGS. Make sure the flags are used directly, instead of needlessly using |
| ; lahf, when possible. |
| |
| @L = external global i32 |
| @M = external global i8 |
| declare i32 @bar(i64) |
| |
| ; CHECK-LABEL: plus_one |
| ; CHECK-NOT: seto |
| ; CHECK-NOT: lahf |
| ; CHECK-NOT: sahf |
| ; CHECK-NOT: pushf |
| ; CHECK-NOT: popf |
| ; CHECK: incl L |
| define i1 @plus_one() { |
| entry: |
| %loaded_L = load i32, i32* @L |
| %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc. |
| store i32 %val, i32* @L |
| %loaded_M = load i8, i8* @M |
| %masked = and i8 %loaded_M, 8 |
| %M_is_true = icmp ne i8 %masked, 0 |
| %L_is_false = icmp eq i32 %val, 0 |
| %cond = and i1 %L_is_false, %M_is_true |
| br i1 %cond, label %exit2, label %exit |
| |
| exit: |
| ret i1 true |
| |
| exit2: |
| ret i1 false |
| } |
| |
| ; CHECK-LABEL: plus_forty_two |
| ; CHECK-NOT: seto |
| ; CHECK-NOT: lahf |
| ; CHECK-NOT: sahf |
| ; CHECK-NOT: pushf |
| ; CHECK-NOT: popf |
| ; CHECK: addl $42, |
| define i1 @plus_forty_two() { |
| entry: |
| %loaded_L = load i32, i32* @L |
| %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc. |
| store i32 %val, i32* @L |
| %loaded_M = load i8, i8* @M |
| %masked = and i8 %loaded_M, 8 |
| %M_is_true = icmp ne i8 %masked, 0 |
| %L_is_false = icmp eq i32 %val, 0 |
| %cond = and i1 %L_is_false, %M_is_true |
| br i1 %cond, label %exit2, label %exit |
| |
| exit: |
| ret i1 true |
| |
| exit2: |
| ret i1 false |
| } |
| |
| ; CHECK-LABEL: minus_one |
| ; CHECK-NOT: seto |
| ; CHECK-NOT: lahf |
| ; CHECK-NOT: sahf |
| ; CHECK-NOT: pushf |
| ; CHECK-NOT: popf |
| ; CHECK: decl L |
| define i1 @minus_one() { |
| entry: |
| %loaded_L = load i32, i32* @L |
| %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec. |
| store i32 %val, i32* @L |
| %loaded_M = load i8, i8* @M |
| %masked = and i8 %loaded_M, 8 |
| %M_is_true = icmp ne i8 %masked, 0 |
| %L_is_false = icmp eq i32 %val, 0 |
| %cond = and i1 %L_is_false, %M_is_true |
| br i1 %cond, label %exit2, label %exit |
| |
| exit: |
| ret i1 true |
| |
| exit2: |
| ret i1 false |
| } |
| |
| ; CHECK-LABEL: minus_forty_two |
| ; CHECK-NOT: seto |
| ; CHECK-NOT: lahf |
| ; CHECK-NOT: sahf |
| ; CHECK-NOT: pushf |
| ; CHECK-NOT: popf |
| ; CHECK: addl $-42, |
| define i1 @minus_forty_two() { |
| entry: |
| %loaded_L = load i32, i32* @L |
| %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec. |
| store i32 %val, i32* @L |
| %loaded_M = load i8, i8* @M |
| %masked = and i8 %loaded_M, 8 |
| %M_is_true = icmp ne i8 %masked, 0 |
| %L_is_false = icmp eq i32 %val, 0 |
| %cond = and i1 %L_is_false, %M_is_true |
| br i1 %cond, label %exit2, label %exit |
| |
| exit: |
| ret i1 true |
| |
| exit2: |
| ret i1 false |
| } |
| |
| ; CHECK-LABEL: test_intervening_call: |
| ; CHECK: cmpxchg |
| ; CHECK: seto %al |
| ; CHECK-NEXT: lahf |
| ; CHECK: call{{[lq]}} bar |
| ; CHECK: addb $127, %al |
| ; CHECK-NEXT: sahf |
| define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) { |
| ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS. |
| %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst |
| %v = extractvalue { i64, i1 } %cx, 0 |
| %p = extractvalue { i64, i1 } %cx, 1 |
| call i32 @bar(i64 %v) |
| br i1 %p, label %t, label %f |
| |
| t: |
| ret i64 42 |
| |
| f: |
| ret i64 0 |
| } |
| |
| ; CHECK-LABEL: test_two_live_flags: |
| ; CHECK: cmpxchg |
| ; CHECK: seto %al |
| ; CHECK-NEXT: lahf |
| ; Save result of the first cmpxchg into a temporary. |
| ; For 32-bit ISA, EDX, EAX are used by the results. |
| ; EAX, EBX, ECX, and EDX are used to set the arguments. |
| ; That leaves us EDI and ESI. |
| ; CHECK32-NEXT: movl %[[AX:eax]], %[[TMP:e[ds]i]] |
| ; For 64-bit ISA, RAX is used for both the result and argument. |
| ; This leaves us plenty of choices for the temporary. For now, |
| ; this is rdx, but any register could do. |
| ; CHECK64-NEXT: mov{{[lq]}} %[[AX:[er]ax]], %[[TMP:rdx]] |
| ; CHECK: cmpxchg |
| ; CHECK-NEXT: sete %al |
| ; Save result of the second cmpxchg onto the stack. |
| ; CHECK-NEXT: push{{[lq]}} %[[AX]] |
| ; Restore result of the first cmpxchg from D, put it back in EFLAGS. |
| ; CHECK-NEXT: mov{{[lq]}} %[[TMP]], %[[AX]] |
| ; CHECK-NEXT: addb $127, %al |
| ; CHECK-NEXT: sahf |
| ; Restore result of the second cmpxchg from the stack. |
| ; CHECK-NEXT: pop{{[lq]}} %[[AX]] |
| ; Test from EFLAGS restored from first cmpxchg, jump if that fails. |
| ; CHECK-NEXT: jne |
| ; Fallthrough to test the second cmpxchg's result. |
| ; CHECK: testb %al, %al |
| ; CHECK-NEXT: je |
| define i64 @test_two_live_flags( |
| i64* %foo0, i64 %bar0, i64 %baz0, |
| i64* %foo1, i64 %bar1, i64 %baz1) { |
| %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst |
| %p0 = extractvalue { i64, i1 } %cx0, 1 |
| %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst |
| %p1 = extractvalue { i64, i1 } %cx1, 1 |
| %flag = and i1 %p0, %p1 |
| br i1 %flag, label %t, label %f |
| |
| t: |
| ret i64 42 |
| |
| f: |
| ret i64 0 |
| } |
| |
| ; CHECK-LABEL: asm_clobbering_flags: |
| ; CHECK: test |
| ; CHECK-NEXT: setg |
| ; CHECK-NEXT: #APP |
| ; CHECK-NEXT: bsfl |
| ; CHECK-NEXT: #NO_APP |
| ; CHECK-NEXT: movl |
| ; CHECK-NEXT: ret |
| define i1 @asm_clobbering_flags(i32* %mem) { |
| %val = load i32, i32* %mem, align 4 |
| %cmp = icmp sgt i32 %val, 0 |
| %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val) |
| store i32 %res, i32* %mem, align 4 |
| ret i1 %cmp |
| } |