| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S | FileCheck -enable-var-scope %s --check-prefix=OPTALL --check-prefixes=OPT,NONSTRESS |
| ; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -stress-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,OPT,STRESS |
| ; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' < %s -mtriple=aarch64-apple-ios -S -disable-cgp-ext-ld-promotion | FileCheck -enable-var-scope %s --check-prefixes=OPTALL,DISABLE |
| |
| ; CodeGenPrepare should move the zext into the block with the load |
| ; so that SelectionDAG can select it with the load. |
| define void @foo(ptr %p, ptr %q) { |
| ; OPTALL-LABEL: define void @foo( |
| ; OPTALL-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { |
| ; OPTALL-NEXT: [[ENTRY:.*:]] |
| ; OPTALL-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; OPTALL-NEXT: [[S:%.*]] = zext i8 [[T]] to i32 |
| ; OPTALL-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; OPTALL-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; OPTALL: [[TRUE]]: |
| ; OPTALL-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; OPTALL-NEXT: ret void |
| ; OPTALL: [[FALSE]]: |
| ; OPTALL-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = zext i8 %t to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a zextload is an operation with only one |
| ; argument to explicitly extend is in the way. |
| ; Make sure the operation is not promoted when the promotion pass is disabled. |
| define void @promoteOneArg(ptr %p, ptr %q) { |
| ; OPT-LABEL: define void @promoteOneArg( |
| ; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; OPT-NEXT: [[PROMOTED:%.*]] = zext i8 [[T]] to i32 |
| ; OPT-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED]], 2 |
| ; OPT-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; OPT-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; OPT: [[TRUE]]: |
| ; OPT-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; OPT-NEXT: ret void |
| ; OPT: [[FALSE]]: |
| ; OPT-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteOneArg( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], 2 |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %add = add nuw i8 %t, 2 |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = zext i8 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a sextload is an operation with only one |
| ; argument to explicitly extend is in the way. |
| ; Version with sext. |
| define void @promoteOneArgSExt(ptr %p, ptr %q) { |
| ; OPT-LABEL: define void @promoteOneArgSExt( |
| ; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; OPT-NEXT: [[PROMOTED:%.*]] = sext i8 [[T]] to i32 |
| ; OPT-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED]], 2 |
| ; OPT-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; OPT-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; OPT: [[TRUE]]: |
| ; OPT-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; OPT-NEXT: ret void |
| ; OPT: [[FALSE]]: |
| ; OPT-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteOneArgSExt( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], 2 |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %add = add nsw i8 %t, 2 |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = sext i8 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a zextload is an operation with two |
| ; arguments to explicitly extend is in the way. |
| ; Extending %add will create two extensions: |
| ; 1. One for %b. |
| ; 2. One for %t. |
| ; #1 will not be removed as we do not know anything about %b. |
| ; #2 may not be merged with the load because %t is used in a comparison. |
| ; Since two extensions may be emitted in the end instead of one before the |
| ; transformation, the regular heuristic does not apply the optimization. |
| define void @promoteTwoArgZext(ptr %p, ptr %q, i8 %b) { |
| ; NONSTRESS-LABEL: define void @promoteTwoArgZext( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], [[B]] |
| ; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; NONSTRESS: [[TRUE]]: |
| ; NONSTRESS-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 |
| ; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; NONSTRESS: [[FALSE]]: |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @promoteTwoArgZext( |
| ; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED:%.*]] = zext i8 [[T]] to i32 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i8 [[B]] to i32 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; STRESS: [[TRUE]]: |
| ; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; STRESS-NEXT: ret void |
| ; STRESS: [[FALSE]]: |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteTwoArgZext( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[T]], [[B]] |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %add = add nuw i8 %t, %b |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = zext i8 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a sextload is an operation with two |
| ; arguments to explicitly extend is in the way. |
| ; Version with sext. |
| define void @promoteTwoArgSExt(ptr %p, ptr %q, i8 %b) { |
| ; NONSTRESS-LABEL: define void @promoteTwoArgSExt( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], [[B]] |
| ; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; NONSTRESS: [[TRUE]]: |
| ; NONSTRESS-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 |
| ; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; NONSTRESS: [[FALSE]]: |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @promoteTwoArgSExt( |
| ; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED:%.*]] = sext i8 [[T]] to i32 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i8 [[B]] to i32 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; STRESS: [[TRUE]]: |
| ; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; STRESS-NEXT: ret void |
| ; STRESS: [[FALSE]]: |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteTwoArgSExt( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i8 [[T]], [[B]] |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = sext i8 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %add = add nsw i8 %t, %b |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = sext i8 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we do not a zextload if we need to introduce more than |
| ; one additional extension. |
| define void @promoteThreeArgZext(ptr %p, ptr %q, i8 %b, i8 %c) { |
| ; NONSTRESS-LABEL: define void @promoteThreeArgZext( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[TMP:%.*]] = add nuw i8 [[T]], [[B]] |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]] |
| ; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; NONSTRESS: [[TRUE]]: |
| ; NONSTRESS-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 |
| ; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; NONSTRESS: [[FALSE]]: |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @promoteThreeArgZext( |
| ; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 |
| ; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[B]] to i32 |
| ; STRESS-NEXT: [[TMP:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED3]] |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i8 [[C]] to i32 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[TMP]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; STRESS: [[TRUE]]: |
| ; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; STRESS-NEXT: ret void |
| ; STRESS: [[FALSE]]: |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteThreeArgZext( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i8 [[B:%.*]], i8 [[C:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[TMP:%.*]] = add nuw i8 [[T]], [[B]] |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i8 [[TMP]], [[C]] |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = zext i8 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %tmp = add nuw i8 %t, %b |
| %add = add nuw i8 %tmp, %c |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = zext i8 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a zextload after promoting and merging |
| ; two extensions. |
| define void @promoteMergeExtArgZExt(ptr %p, ptr %q, i16 %b) { |
| ; NONSTRESS-LABEL: define void @promoteMergeExtArgZExt( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]] |
| ; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; NONSTRESS: [[TRUE]]: |
| ; NONSTRESS-NEXT: [[S:%.*]] = zext i16 [[ADD]] to i32 |
| ; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; NONSTRESS: [[FALSE]]: |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @promoteMergeExtArgZExt( |
| ; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i16 [[B]] to i32 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; STRESS: [[TRUE]]: |
| ; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; STRESS-NEXT: ret void |
| ; STRESS: [[FALSE]]: |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteMergeExtArgZExt( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i16 [[EXT]], [[B]] |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = zext i16 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %ext = zext i8 %t to i16 |
| %add = add nuw i16 %ext, %b |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = zext i16 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to form a sextload after promoting and merging |
| ; two extensions. |
| ; Version with sext. |
| define void @promoteMergeExtArgSExt(ptr %p, ptr %q, i16 %b) { |
| ; NONSTRESS-LABEL: define void @promoteMergeExtArgSExt( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]] |
| ; NONSTRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; NONSTRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; NONSTRESS: [[TRUE]]: |
| ; NONSTRESS-NEXT: [[S:%.*]] = sext i16 [[ADD]] to i32 |
| ; NONSTRESS-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; NONSTRESS: [[FALSE]]: |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @promoteMergeExtArgSExt( |
| ; STRESS-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i32 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i16 [[B]] to i32 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; STRESS-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; STRESS: [[TRUE]]: |
| ; STRESS-NEXT: store i32 [[ADD]], ptr [[Q]], align 4 |
| ; STRESS-NEXT: ret void |
| ; STRESS: [[FALSE]]: |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteMergeExtArgSExt( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i16 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i16 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i16 [[EXT]], [[B]] |
| ; DISABLE-NEXT: [[A:%.*]] = icmp slt i8 [[T]], 20 |
| ; DISABLE-NEXT: br i1 [[A]], label %[[TRUE:.*]], label %[[FALSE:.*]] |
| ; DISABLE: [[TRUE]]: |
| ; DISABLE-NEXT: [[S:%.*]] = sext i16 [[ADD]] to i32 |
| ; DISABLE-NEXT: store i32 [[S]], ptr [[Q]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; DISABLE: [[FALSE]]: |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %ext = zext i8 %t to i16 |
| %add = add nsw i16 %ext, %b |
| %a = icmp slt i8 %t, 20 |
| br i1 %a, label %true, label %false |
| true: |
| %s = sext i16 %add to i32 |
| store i32 %s, ptr %q |
| ret void |
| false: |
| ret void |
| } |
| |
| ; Check that we manage to catch all the extload opportunities that are exposed |
| ; by the different iterations of codegen prepare. |
| ; Moreover, check that we do not promote more than we need to. |
| ; Here is what is happening in this test (not necessarly in this order): |
| ; 1. We try to promote the operand of %sextadd. |
| ; a. This creates one sext of %ld2 and one of %zextld |
| ; b. The sext of %ld2 can be combine with %ld2, so we remove one sext but |
| ; introduced one. This is fine with the current heuristic: neutral. |
| ; => We have one zext of %zextld left and we created one sext of %ld2. |
| ; 2. We try to promote the operand of %sextaddza. |
| ; a. This creates one sext of %zexta and one of %zextld |
| ; b. The sext of %zexta can be combined with the zext of %a. |
| ; c. The sext of %zextld leads to %ld and can be combined with it. This is |
| ; done by promoting %zextld. This is fine with the current heuristic: |
| ; neutral. |
| ; => We have created a new zext of %ld and we created one sext of %zexta. |
| ; 3. We try to promote the operand of %sextaddb. |
| ; a. This creates one sext of %b and one of %zextld |
| ; b. The sext of %b is a dead-end, nothing to be done. |
| ; c. Same thing as 2.c. happens. |
| ; => We have created a new zext of %ld and we created one sext of %b. |
| ; 4. We try to promote the operand of the zext of %zextld introduced in #1. |
| ; a. Same thing as 2.c. happens. |
| ; b. %zextld does not have any other uses. It is dead coded. |
| ; => We have created a new zext of %ld and we removed a zext of %zextld and |
| ; a zext of %ld. |
| ; Currently we do not try to reuse existing extensions, so in the end we have |
| ; 3 identical zext of %ld. The extensions will be CSE'ed by SDag. |
| define void @severalPromotions(ptr %addr1, ptr %addr2, i8 %a, i32 %b) { |
| ; OPT-LABEL: define void @severalPromotions( |
| ; OPT-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) { |
| ; OPT-NEXT: [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1 |
| ; OPT-NEXT: [[PROMOTED9:%.*]] = zext i8 [[LD]] to i64 |
| ; OPT-NEXT: [[PROMOTED6:%.*]] = zext i8 [[LD]] to i64 |
| ; OPT-NEXT: [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4 |
| ; OPT-NEXT: [[PROMOTED:%.*]] = sext i32 [[LD2]] to i64 |
| ; OPT-NEXT: [[PROMOTED2:%.*]] = zext i8 [[LD]] to i64 |
| ; OPT-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED2]] |
| ; OPT-NEXT: [[PROMOTED5:%.*]] = zext i8 [[A]] to i64 |
| ; OPT-NEXT: [[ADDZA:%.*]] = add nsw i64 [[PROMOTED5]], [[PROMOTED6]] |
| ; OPT-NEXT: [[PROMOTED7:%.*]] = sext i32 [[B]] to i64 |
| ; OPT-NEXT: [[ADDB:%.*]] = add nsw i64 [[PROMOTED7]], [[PROMOTED9]] |
| ; OPT-NEXT: call void @dummy(i64 [[ADD]], i64 [[ADDZA]], i64 [[ADDB]]) |
| ; OPT-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @severalPromotions( |
| ; DISABLE-SAME: ptr [[ADDR1:%.*]], ptr [[ADDR2:%.*]], i8 [[A:%.*]], i32 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[LD:%.*]] = load i8, ptr [[ADDR1]], align 1 |
| ; DISABLE-NEXT: [[ZEXTLD:%.*]] = zext i8 [[LD]] to i32 |
| ; DISABLE-NEXT: [[LD2:%.*]] = load i32, ptr [[ADDR2]], align 4 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[LD2]], [[ZEXTLD]] |
| ; DISABLE-NEXT: [[SEXTADD:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[ZEXTA:%.*]] = zext i8 [[A]] to i32 |
| ; DISABLE-NEXT: [[ADDZA:%.*]] = add nsw i32 [[ZEXTA]], [[ZEXTLD]] |
| ; DISABLE-NEXT: [[SEXTADDZA:%.*]] = sext i32 [[ADDZA]] to i64 |
| ; DISABLE-NEXT: [[ADDB:%.*]] = add nsw i32 [[B]], [[ZEXTLD]] |
| ; DISABLE-NEXT: [[SEXTADDB:%.*]] = sext i32 [[ADDB]] to i64 |
| ; DISABLE-NEXT: call void @dummy(i64 [[SEXTADD]], i64 [[SEXTADDZA]], i64 [[SEXTADDB]]) |
| ; DISABLE-NEXT: ret void |
| ; |
| %ld = load i8, ptr %addr1 |
| %zextld = zext i8 %ld to i32 |
| %ld2 = load i32, ptr %addr2 |
| %add = add nsw i32 %ld2, %zextld |
| %sextadd = sext i32 %add to i64 |
| %zexta = zext i8 %a to i32 |
| %addza = add nsw i32 %zexta, %zextld |
| %sextaddza = sext i32 %addza to i64 |
| %addb = add nsw i32 %b, %zextld |
| %sextaddb = sext i32 %addb to i64 |
| call void @dummy(i64 %sextadd, i64 %sextaddza, i64 %sextaddb) |
| ret void |
| } |
| |
| declare void @dummy(i64, i64, i64) |
| |
| ; Make sure we do not try to promote vector types since the type promotion |
| ; helper does not support them for now. |
| define void @vectorPromotion() { |
| ; OPTALL-LABEL: define void @vectorPromotion() { |
| ; OPTALL-NEXT: [[ENTRY:.*:]] |
| ; OPTALL-NEXT: [[A:%.*]] = shl nuw nsw <2 x i32> zeroinitializer, splat (i32 8) |
| ; OPTALL-NEXT: [[B:%.*]] = zext <2 x i32> [[A]] to <2 x i64> |
| ; OPTALL-NEXT: ret void |
| ; |
| entry: |
| %a = shl nuw nsw <2 x i32> zeroinitializer, <i32 8, i32 8> |
| %b = zext <2 x i32> %a to <2 x i64> |
| ret void |
| } |
| |
| @a = common global i32 0, align 4 |
| @c = common global [2 x i32] zeroinitializer, align 4 |
| |
| ; Make sure we support promotion of operands that produces a Value as opposed |
| ; to an instruction. |
| ; This used to cause a crash. |
| define i32 @promotionOfArgEndsUpInValue(ptr %addr) { |
| ; OPT-LABEL: define i32 @promotionOfArgEndsUpInValue( |
| ; OPT-SAME: ptr [[ADDR:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 |
| ; OPT-NEXT: [[PROMOTED:%.*]] = sext i16 [[VAL]] to i32 |
| ; OPT-NEXT: [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a |
| ; OPT-NEXT: [[PROMOTED2:%.*]] = zext i1 [[CMP]] to i32 |
| ; OPT-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PROMOTED]], [[PROMOTED2]] |
| ; OPT-NEXT: ret i32 [[ADD]] |
| ; |
| ; DISABLE-LABEL: define i32 @promotionOfArgEndsUpInValue( |
| ; DISABLE-SAME: ptr [[ADDR:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[VAL:%.*]] = load i16, ptr [[ADDR]], align 2 |
| ; DISABLE-NEXT: [[CMP:%.*]] = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a |
| ; DISABLE-NEXT: [[EXT:%.*]] = zext i1 [[CMP]] to i16 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[VAL]], [[EXT]] |
| ; DISABLE-NEXT: [[CONV3:%.*]] = sext i16 [[ADD]] to i32 |
| ; DISABLE-NEXT: ret i32 [[CONV3]] |
| ; |
| entry: |
| %val = load i16, ptr %addr |
| %cmp = icmp ne ptr getelementptr inbounds ([2 x i32], ptr @c, i64 0, i64 1), @a |
| %ext = zext i1 %cmp to i16 |
| %add = add nuw nsw i16 %val, %ext |
| %conv3 = sext i16 %add to i32 |
| ret i32 %conv3 |
| } |
| |
| ; Check that we see that one zext can be derived from the other for free. |
| define void @promoteTwoArgZextWithSourceExtendedTwice(ptr %p, ptr %q, i32 %b, ptr %addr) { |
| ; OPT-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice( |
| ; OPT-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; OPT-NEXT: [[PROMOTED1:%.*]] = zext i8 [[T]] to i64 |
| ; OPT-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; OPT-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; OPT-NEXT: [[ADD2:%.*]] = add nuw i64 [[PROMOTED1]], 12 |
| ; OPT-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 |
| ; OPT-NEXT: store i64 [[ADD2]], ptr [[Q]], align 8 |
| ; OPT-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteTwoArgZextWithSourceExtendedTwice( |
| ; DISABLE-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[ADD2:%.*]] = add nuw i32 [[ZEXTT]], 12 |
| ; DISABLE-NEXT: store i32 [[ADD]], ptr [[ADDR]], align 4 |
| ; DISABLE-NEXT: [[S:%.*]] = zext i32 [[ADD2]] to i64 |
| ; DISABLE-NEXT: store i64 [[S]], ptr [[Q]], align 8 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nuw i32 %zextt, %b |
| %add2 = add nuw i32 %zextt, 12 |
| store i32 %add, ptr %addr |
| %s = zext i32 %add2 to i64 |
| store i64 %s, ptr %q |
| ret void |
| } |
| |
| ; Check that we do not increase the cost of the code. |
| ; The input has one free zext and one free sext. If we would have promoted |
| ; all the way through the load we would end up with a free zext and a |
| ; non-free sext (of %b). |
| define void @doNotPromoteFreeSExtFromAddrMode(ptr %p, i32 %b, ptr %addr) { |
| ; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] |
| ; NONSTRESS-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = sext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED3]], [[PROMOTED2]] |
| ; STRESS-NEXT: [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32 |
| ; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]] |
| ; STRESS-NEXT: store i32 [[PROMOTED]], ptr [[STADDR]], align 4 |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] |
| ; DISABLE-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nsw i32 %zextt, %b |
| %idx64 = sext i32 %add to i64 |
| %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64 |
| store i32 %add, ptr %staddr |
| ret void |
| } |
| |
| ; Check that we do not increase the cost of the code. |
| ; The input has one free zext and one free sext. If we would have promoted |
| ; all the way through the load we would end up with a free zext and a |
| ; non-free sext (of %b). |
| define void @doNotPromoteFreeSExtFromAddrMode64(ptr %p, i32 %b, ptr %addr, i64 %stuff) { |
| ; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]] |
| ; NONSTRESS-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[ADD]] |
| ; STRESS-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode64( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i64 [[STUFF:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i64, ptr [[ADDR]], i64 [[IDX64]] |
| ; DISABLE-NEXT: store i64 [[STUFF]], ptr [[STADDR]], align 8 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nsw i32 %zextt, %b |
| %idx64 = sext i32 %add to i64 |
| %staddr = getelementptr inbounds i64, ptr %addr, i64 %idx64 |
| store i64 %stuff, ptr %staddr |
| ret void |
| } |
| |
| ; Check that we do not increase the cost of the code. |
| ; The input has one free zext and one free sext. If we would have promoted |
| ; all the way through the load we would end up with a free zext and a |
| ; non-free sext (of %b). |
| define void @doNotPromoteFreeSExtFromAddrMode128(ptr %p, i32 %b, ptr %addr, i128 %stuff) { |
| ; NONSTRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]] |
| ; NONSTRESS-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[ADD]] |
| ; STRESS-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @doNotPromoteFreeSExtFromAddrMode128( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i128 [[STUFF:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i128, ptr [[ADDR]], i64 [[IDX64]] |
| ; DISABLE-NEXT: store i128 [[STUFF]], ptr [[STADDR]], align 16 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nsw i32 %zextt, %b |
| %idx64 = sext i32 %add to i64 |
| %staddr = getelementptr inbounds i128, ptr %addr, i64 %idx64 |
| store i128 %stuff, ptr %staddr |
| ret void |
| } |
| |
| |
| ; Check that we do not increase the cost of the code. |
| ; The input has one free zext and one free sext. If we would have promoted |
| ; all the way through the load we would end up with a free zext and a |
| ; non-free sext (of %b). |
| define void @promoteSExtFromAddrMode256(ptr %p, i32 %b, ptr %addr, i256 %stuff) { |
| ; OPT-LABEL: define void @promoteSExtFromAddrMode256( |
| ; OPT-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; OPT-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 |
| ; OPT-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 |
| ; OPT-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] |
| ; OPT-NEXT: [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[ADD]] |
| ; OPT-NEXT: store i256 [[STUFF]], ptr [[STADDR]], align 16 |
| ; OPT-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @promoteSExtFromAddrMode256( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]], i256 [[STUFF:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i256, ptr [[ADDR]], i64 [[IDX64]] |
| ; DISABLE-NEXT: store i256 [[STUFF]], ptr [[STADDR]], align 16 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nsw i32 %zextt, %b |
| %idx64 = sext i32 %add to i64 |
| %staddr = getelementptr inbounds i256, ptr %addr, i64 %idx64 |
| store i256 %stuff, ptr %staddr |
| ret void |
| } |
| |
| ; Check that we do not increase the cost of the code. |
| ; The input has one free zext and one free zext. |
| ; When we promote all the way through the load, we end up with |
| ; a free zext and a non-free zext (of %b). |
| ; However, the current target lowering says zext i32 to i64 is free |
| ; so the promotion happens because the cost did not change and may |
| ; expose more opportunities. |
| ; This would need to be fixed at some point. |
| ; |
| ; This transformation should really happen only for stress mode. |
| define void @doNotPromoteFreeZExtFromAddrMode(ptr %p, i32 %b, ptr %addr) { |
| ; NONSTRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] |
| ; NONSTRESS-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 |
| ; NONSTRESS-NEXT: ret void |
| ; |
| ; STRESS-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED3:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nuw i64 [[PROMOTED3]], [[PROMOTED2]] |
| ; STRESS-NEXT: [[PROMOTED:%.*]] = trunc i64 [[ADD]] to i32 |
| ; STRESS-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[ADD]] |
| ; STRESS-NEXT: store i32 [[PROMOTED]], ptr [[STADDR]], align 4 |
| ; STRESS-NEXT: ret void |
| ; |
| ; DISABLE-LABEL: define void @doNotPromoteFreeZExtFromAddrMode( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]], ptr [[ADDR:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = getelementptr inbounds i32, ptr [[ADDR]], i64 [[IDX64]] |
| ; DISABLE-NEXT: store i32 [[ADD]], ptr [[STADDR]], align 4 |
| ; DISABLE-NEXT: ret void |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nuw i32 %zextt, %b |
| %idx64 = zext i32 %add to i64 |
| %staddr = getelementptr inbounds i32, ptr %addr, i64 %idx64 |
| store i32 %add, ptr %staddr |
| ret void |
| } |
| |
| define i64 @doNotPromoteFreeSExtFromShift(ptr %p, i32 %b) { |
| ; NONSTRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 |
| ; NONSTRESS-NEXT: ret i64 [[STADDR]] |
| ; |
| ; STRESS-LABEL: define i64 @doNotPromoteFreeSExtFromShift( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = sext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nsw i64 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[STADDR:%.*]] = shl i64 [[ADD]], 12 |
| ; STRESS-NEXT: ret i64 [[STADDR]] |
| ; |
| ; DISABLE-LABEL: define i64 @doNotPromoteFreeSExtFromShift( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nsw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = sext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 |
| ; DISABLE-NEXT: ret i64 [[STADDR]] |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nsw i32 %zextt, %b |
| %idx64 = sext i32 %add to i64 |
| %staddr = shl i64 %idx64, 12 |
| ret i64 %staddr |
| } |
| |
| ; Same comment as doNotPromoteFreeZExtFromAddrMode. |
| ; |
| ; This transformation should really happen only for stress mode. |
| define i64 @doNotPromoteFreeZExtFromShift(ptr %p, i32 %b) { |
| ; NONSTRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift( |
| ; NONSTRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; NONSTRESS-NEXT: [[ENTRY:.*:]] |
| ; NONSTRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; NONSTRESS-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; NONSTRESS-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; NONSTRESS-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 |
| ; NONSTRESS-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 |
| ; NONSTRESS-NEXT: ret i64 [[STADDR]] |
| ; |
| ; STRESS-LABEL: define i64 @doNotPromoteFreeZExtFromShift( |
| ; STRESS-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; STRESS-NEXT: [[ENTRY:.*:]] |
| ; STRESS-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; STRESS-NEXT: [[PROMOTED2:%.*]] = zext i8 [[T]] to i64 |
| ; STRESS-NEXT: [[PROMOTED1:%.*]] = zext i32 [[B]] to i64 |
| ; STRESS-NEXT: [[ADD:%.*]] = add nuw i64 [[PROMOTED2]], [[PROMOTED1]] |
| ; STRESS-NEXT: [[STADDR:%.*]] = shl i64 [[ADD]], 12 |
| ; STRESS-NEXT: ret i64 [[STADDR]] |
| ; |
| ; DISABLE-LABEL: define i64 @doNotPromoteFreeZExtFromShift( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[B:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[T:%.*]] = load i8, ptr [[P]], align 1 |
| ; DISABLE-NEXT: [[ZEXTT:%.*]] = zext i8 [[T]] to i32 |
| ; DISABLE-NEXT: [[ADD:%.*]] = add nuw i32 [[ZEXTT]], [[B]] |
| ; DISABLE-NEXT: [[IDX64:%.*]] = zext i32 [[ADD]] to i64 |
| ; DISABLE-NEXT: [[STADDR:%.*]] = shl i64 [[IDX64]], 12 |
| ; DISABLE-NEXT: ret i64 [[STADDR]] |
| ; |
| entry: |
| %t = load i8, ptr %p |
| %zextt = zext i8 %t to i32 |
| %add = add nuw i32 %zextt, %b |
| %idx64 = zext i32 %add to i64 |
| %staddr = shl i64 %idx64, 12 |
| ret i64 %staddr |
| } |
| |
| ; The input has one free zext and one non-free sext. |
| ; When we promote all the way through to the load, we end up with |
| ; a free zext, a free sext (%ld1), and a non-free sext (of %cst). |
| ; However, we when generate load pair and the free sext(%ld1) becomes |
| ; non-free. So technically, we trade a non-free sext to two non-free |
| ; sext. |
| ; This would need to be fixed at some point. |
| ; |
| ; This transformation should really happen only for stress mode. |
| define i64 @doNotPromoteBecauseOfPairedLoad(ptr %p, i32 %cst) { |
| ; OPT-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad( |
| ; OPT-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) { |
| ; OPT-NEXT: [[LD0:%.*]] = load i32, ptr [[P]], align 4 |
| ; OPT-NEXT: [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 |
| ; OPT-NEXT: [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4 |
| ; OPT-NEXT: [[PROMOTED:%.*]] = sext i32 [[LD1]] to i64 |
| ; OPT-NEXT: [[PROMOTED1:%.*]] = sext i32 [[CST]] to i64 |
| ; OPT-NEXT: [[RES:%.*]] = add nsw i64 [[PROMOTED]], [[PROMOTED1]] |
| ; OPT-NEXT: [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64 |
| ; OPT-NEXT: [[FINAL:%.*]] = add i64 [[RES]], [[ZEXTLD0]] |
| ; OPT-NEXT: ret i64 [[FINAL]] |
| ; |
| ; DISABLE-LABEL: define i64 @doNotPromoteBecauseOfPairedLoad( |
| ; DISABLE-SAME: ptr [[P:%.*]], i32 [[CST:%.*]]) { |
| ; DISABLE-NEXT: [[LD0:%.*]] = load i32, ptr [[P]], align 4 |
| ; DISABLE-NEXT: [[IDXLD1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1 |
| ; DISABLE-NEXT: [[LD1:%.*]] = load i32, ptr [[IDXLD1]], align 4 |
| ; DISABLE-NEXT: [[RES:%.*]] = add nsw i32 [[LD1]], [[CST]] |
| ; DISABLE-NEXT: [[SEXTRES:%.*]] = sext i32 [[RES]] to i64 |
| ; DISABLE-NEXT: [[ZEXTLD0:%.*]] = zext i32 [[LD0]] to i64 |
| ; DISABLE-NEXT: [[FINAL:%.*]] = add i64 [[SEXTRES]], [[ZEXTLD0]] |
| ; DISABLE-NEXT: ret i64 [[FINAL]] |
| ; |
| %ld0 = load i32, ptr %p |
| %idxLd1 = getelementptr inbounds i32, ptr %p, i64 1 |
| %ld1 = load i32, ptr %idxLd1 |
| %res = add nsw i32 %ld1, %cst |
| %sextres = sext i32 %res to i64 |
| %zextLd0 = zext i32 %ld0 to i64 |
| %final = add i64 %sextres, %zextLd0 |
| ret i64 %final |
| } |
| |
| define i64 @promoteZextShl(i1 %c, ptr %P) { |
| ; OPT-LABEL: define i64 @promoteZextShl( |
| ; OPT-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { |
| ; OPT-NEXT: [[ENTRY:.*:]] |
| ; OPT-NEXT: [[LD:%.*]] = load i16, ptr [[P]], align 2 |
| ; OPT-NEXT: [[PROMOTED1:%.*]] = zext i16 [[LD]] to i64 |
| ; OPT-NEXT: br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]] |
| ; OPT: [[IF_THEN]]: |
| ; OPT-NEXT: [[SHL2:%.*]] = shl nsw i64 [[PROMOTED1]], 1 |
| ; OPT-NEXT: ret i64 [[SHL2]] |
| ; OPT: [[END]]: |
| ; OPT-NEXT: ret i64 0 |
| ; |
| ; DISABLE-LABEL: define i64 @promoteZextShl( |
| ; DISABLE-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { |
| ; DISABLE-NEXT: [[ENTRY:.*:]] |
| ; DISABLE-NEXT: [[LD:%.*]] = load i16, ptr [[P]], align 2 |
| ; DISABLE-NEXT: [[Z:%.*]] = zext i16 [[LD]] to i32 |
| ; DISABLE-NEXT: br i1 [[C]], label %[[END:.*]], label %[[IF_THEN:.*]] |
| ; DISABLE: [[IF_THEN]]: |
| ; DISABLE-NEXT: [[SHL2:%.*]] = shl nsw i32 [[Z]], 1 |
| ; DISABLE-NEXT: [[R:%.*]] = sext i32 [[SHL2]] to i64 |
| ; DISABLE-NEXT: ret i64 [[R]] |
| ; DISABLE: [[END]]: |
| ; DISABLE-NEXT: ret i64 0 |
| ; |
| entry: |
| %ld = load i16, ptr %P |
| br i1 %c, label %end, label %if.then |
| if.then: |
| %z = zext i16 %ld to i32 |
| %shl2 = shl nsw i32 %z, 1 |
| %r = sext i32 %shl2 to i64 |
| ret i64 %r |
| end: |
| ret i64 0 |
| } |