| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt < %s -passes=aggressive-instcombine,instcombine -S | FileCheck %s |
| |
| ; https://alive2.llvm.org/ce/z/KuJPnU |
| define i64 @umulh(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw i128 [[TMP5]] to i64 |
| ; CHECK-NEXT: ret i64 [[TMP4]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; Commutative ops should match in any order. Ops where operand order has been |
| ; reversed from above are marked 'commuted'. As per instcombine contributors |
| ; guide, constants are always canonicalized to RHS, so don't bother commuting |
| ; constants. |
| define i64 @umulh__commuted(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__commuted( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw i128 [[TMP5]] to i64 |
| ; CHECK-NEXT: ret i64 [[TMP4]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %x_hi, %y_lo ; commuted |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %x_lo, %y_hi ; commuted |
| %y_lo_x_lo = mul nuw i64 %x_lo, %y_lo ; commuted |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_lo_x_hi, %y_hi_x_lo ; commuted |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %y_lo_x_lo_hi, %cross_sum_lo ; commuted |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %y_hi_x_hi, %cross_sum_hi ; commuted |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %carry, %intermediate ; commuted |
| %hw64 = add i64 %low_accum_hi, %intermediate_plus_carry ; commuted |
| |
| ret i64 %hw64 |
| } |
| |
| define i32 @mulh_src32(i32 %x, i32 %y) { |
| ; Extract low and high 16 bits |
| ; CHECK-LABEL: define i32 @mulh_src32( |
| ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[X]] to i64 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[Y]] to i64 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 32 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw i64 [[TMP4]] to i32 |
| ; CHECK-NEXT: ret i32 [[TMP5]] |
| ; |
| %x_lo = and i32 %x, u0xffff ; x & 0xffffffff |
| %y_lo = and i32 %y, u0xffff ; y & 0xffffffff |
| %x_hi = lshr i32 %x, 16 ; x >> 16 |
| %y_hi = lshr i32 %y, 16 ; y >> 16 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i32 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i32 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i32 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i32 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i32 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i32 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i32 u0x10000, i32 0 ; if overflow, add 1 << 16 |
| |
| ; High 16 bits of low product |
| %y_lo_x_lo_hi = lshr i32 %y_lo_x_lo, 16 |
| |
| ; Low and high 16 bits of cross_sum |
| %cross_sum_lo = and i32 %cross_sum, u0xffff |
| %cross_sum_hi = lshr i32 %cross_sum, 16 |
| |
| %low_accum = add nuw nsw i32 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i32 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i32 %low_accum, 16 |
| %intermediate_plus_carry = add i32 %intermediate, %carry |
| %hw64 = add i32 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i32 %hw64 |
| } |
| |
| define i128 @mulh_src128(i128 %x, i128 %y) { |
| ; Extract low and high 64 bits |
| ; CHECK-LABEL: define i128 @mulh_src128( |
| ; CHECK-SAME: i128 [[X:%.*]], i128 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i128 [[X]] to i256 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i128 [[Y]] to i256 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i256 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i256 [[TMP3]], 128 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i256 [[TMP4]] to i128 |
| ; CHECK-NEXT: ret i128 [[HW64]] |
| ; |
| %x_lo = and i128 %x, u0xffffffffffffffff ; x & 0xffffffff |
| %y_lo = and i128 %y, u0xffffffffffffffff ; y & 0xffffffff |
| %x_hi = lshr i128 %x, 64 ; x >> 16 |
| %y_hi = lshr i128 %y, 64 ; y >> 16 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i128 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i128 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i128 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i128 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i128 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i128 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i128 u0x10000000000000000, i128 0 ; if overflow, add 1 << 16 |
| |
| ; High 16 bits of low product |
| %y_lo_x_lo_hi = lshr i128 %y_lo_x_lo, 64 |
| |
| ; Low and high 16 bits of cross_sum |
| %cross_sum_lo = and i128 %cross_sum, u0xffffffffffffffff |
| %cross_sum_hi = lshr i128 %cross_sum, 64 |
| |
| %low_accum = add nuw nsw i128 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i128 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i128 %low_accum, 64 |
| %intermediate_plus_carry = add i128 %intermediate, %carry |
| %hw64 = add i128 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i128 %hw64 |
| } |
| |
| define <2 x i32> @mulh_v2i32(<2 x i32> %x, <2 x i32> %y) { |
| ; Extract low and high 16 bits |
| ; CHECK-LABEL: define <2 x i32> @mulh_v2i32( |
| ; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[X]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[Y]] to <2 x i64> |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw <2 x i64> [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 32) |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw <2 x i64> [[TMP4]] to <2 x i32> |
| ; CHECK-NEXT: ret <2 x i32> [[HW64]] |
| ; |
| %x_lo = and <2 x i32> %x, <i32 u0xffff, i32 u0xffff> |
| %y_lo = and <2 x i32> %y, <i32 u0xffff, i32 u0xffff> |
| %x_hi = lshr <2 x i32> %x, <i32 16, i32 16> |
| %y_hi = lshr <2 x i32> %y, <i32 16, i32 16> |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw <2 x i32> %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw <2 x i32> %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw <2 x i32> %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw <2 x i32> %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add <2 x i32> %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult <2 x i32> %cross_sum, %y_lo_x_hi |
| %carry = select <2 x i1> %carry_out, <2 x i32> <i32 u0x10000, i32 u0x10000>, <2 x i32> <i32 0, i32 0> |
| |
| ; High 16 bits of low product |
| %y_lo_x_lo_hi = lshr <2 x i32> %y_lo_x_lo, <i32 16, i32 16> |
| |
| ; Low and high 16 bits of cross_sum |
| %cross_sum_lo = and <2 x i32> %cross_sum, <i32 u0xffff, i32 u0xffff> |
| %cross_sum_hi = lshr <2 x i32> %cross_sum, <i32 16, i32 16> |
| |
| %low_accum = add nuw nsw <2 x i32> %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw <2 x i32> %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr <2 x i32> %low_accum, <i32 16, i32 16> |
| %intermediate_plus_carry = add <2 x i32> %intermediate, %carry |
| %hw64 = add <2 x i32> %intermediate_plus_carry, %low_accum_hi |
| |
| ret <2 x i32> %hw64 |
| } |
| |
| ; https://alive2.llvm.org/ce/z/PPXtkR |
| define void @full_mul_int128(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc nuw i128 [[TMP5]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: store i64 [[TMP8]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| ; Store high 64 bits |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| ; Reconstruct low 64 bits |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| ; Store low 64 bits |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| |
| ; Negative tests |
| |
| define i64 @umulh_notandx(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notandx( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967294 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967294 ; x & 0xfffffffe |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notandy(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notandy( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967294 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967294 ; y & 0xfffffffe |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notshiftx(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notshiftx( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 16 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 16 ; x >> 16 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notshifty(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notshifty( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 16 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 16 ; y >> 16 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notcarry(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notcarry( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967295, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967295, i64 0 ; if overflow, add wrong value |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notxlo(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notxlo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x ; y_lo * x |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| define i64 @umulh_notcrosssum(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh_notcrosssum( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = shl i64 [[Y_HI_X_LO]], 1 |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967294 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_hi_x_lo ; wrong crosssum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| |
| |
| ; Uses tests. |
| |
| ; 'x_lo' can have more than 2 uses. |
| define i64 @umulh__mul_use__x_lo(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[X_LO]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| call void (...) @llvm.fake.use(i64 %x_lo) |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_hi' can have more than 2 uses. |
| define i64 @umulh__mul_use__y_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| call void (...) @llvm.fake.use(i64 %y_hi) |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_hi * x_hi' must have no more than 2 uses. |
| define i64 @umulh__mul_use__y_lo_x_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_lo_x_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_HI]]) |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| call void (...) @llvm.fake.use(i64 %y_lo_x_hi) |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_hi * x_hi' must have single use. |
| define i64 @umulh__mul_use__y_hi_x_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_hi_x_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI_X_HI]]) |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| call void (...) @llvm.fake.use(i64 %y_hi_x_hi) |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_hi * x_lo' must have single use. |
| define i64 @umulh__mul_use__y_hi_x_lo(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_hi_x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI_X_LO]]) |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| call void (...) @llvm.fake.use(i64 %y_hi_x_lo) |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_lo * x_lo' has a single use if only doing high part of multiply and 2 uses |
| ; when doing both low/high parts. Doing the optimization when only doing the |
| ; high part and there's a 2nd unrelated use here still results in less |
| ; instructions and is likely profitable, so this seems ok. |
| define i64 @umulh__mul_use__y_lo_x_lo(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_lo_x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_LO]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: ret i64 [[TMP5]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| call void (...) @llvm.fake.use(i64 %y_lo_x_lo) |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'cross_sum' must have no more than 3 uses. |
| define i64 @umulh__mul_use__cross_sum(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__cross_sum( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM]]) |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| call void (...) @llvm.fake.use(i64 %cross_sum) |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'carry_out' must have single use. |
| define i64 @umulh__mul_use__carry_out(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__carry_out( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i1 [[CARRY_OUT]]) |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| call void (...) @llvm.fake.use(i1 %carry_out) |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'carry' must have single use. |
| define i64 @umulh__mul_use__carry(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__carry( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CARRY]]) |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| call void (...) @llvm.fake.use(i64 %carry) |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'y_lo_x_lo_hi' must have single use. |
| define i64 @umulh__mul_use__y_lo_x_lo_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__y_lo_x_lo_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_LO_HI]]) |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| call void (...) @llvm.fake.use(i64 %y_lo_x_lo_hi) |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'cross_sum_lo' must have single use. |
| define i64 @umulh__mul_use__cross_sum_lo(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__cross_sum_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM_LO]]) |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| call void (...) @llvm.fake.use(i64 %cross_sum_lo) |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'cross_sum_hi' must have single use. |
| define i64 @umulh__mul_use__cross_sum_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__cross_sum_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM_HI]]) |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| call void (...) @llvm.fake.use(i64 %cross_sum_hi) |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'low_accum' has a single use if only doing high part of multiply and 2 uses |
| ; when doing both low/high parts. Unrelated use here, but still seems |
| ; profitable. |
| define i64 @umulh__mul_use__low_accum(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__low_accum( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul i64 [[Y]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul i64 [[Y_HI]], [[X]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: ret i64 [[TMP5]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| call void (...) @llvm.fake.use(i64 %low_accum) |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'intermediate' must have single use. |
| define i64 @umulh__mul_use__intermediate(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__intermediate( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[INTERMEDIATE]]) |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| call void (...) @llvm.fake.use(i64 %intermediate) |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'low_accum_hi' must have single use. |
| define i64 @umulh__mul_use__low_accum_hi(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__low_accum_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM_HI]]) |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| call void (...) @llvm.fake.use(i64 %low_accum_hi) |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| |
| ret i64 %hw64 |
| } |
| |
| ; 'intermediate_plus_carry' must have single use. |
| define i64 @umulh__mul_use__intermediate_plus_carry(i64 %x, i64 %y) { |
| ; CHECK-LABEL: define i64 @umulh__mul_use__intermediate_plus_carry( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[INTERMEDIATE:%.*]] = add nuw i64 [[CROSS_SUM_HI]], [[Y_HI_X_HI]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[INTERMEDIATE_PLUS_CARRY:%.*]] = add i64 [[INTERMEDIATE]], [[CARRY]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[INTERMEDIATE_PLUS_CARRY]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[INTERMEDIATE_PLUS_CARRY]]) |
| ; CHECK-NEXT: ret i64 [[HW64]] |
| ; |
| ; Extract low and high 32 bits |
| %x_lo = and i64 %x, 4294967295 ; x & 0xffffffff |
| %y_lo = and i64 %y, 4294967295 ; y & 0xffffffff |
| %x_hi = lshr i64 %x, 32 ; x >> 32 |
| %y_hi = lshr i64 %y, 32 ; y >> 32 |
| |
| ; Cross products |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi ; y_lo * x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi ; y_hi * x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo ; y_hi * x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo ; y_lo * x_lo |
| |
| ; Add cross terms |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi ; full 64-bit sum |
| |
| ; Carry if overflowed |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 ; if overflow, add 1 << 32 |
| |
| ; High 32 bits of low product |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| ; Low and high 32 bits of cross_sum |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| ; Final result accumulation |
| %intermediate = add nuw i64 %cross_sum_hi, %y_hi_x_hi |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %intermediate_plus_carry = add i64 %intermediate, %carry |
| %hw64 = add i64 %intermediate_plus_carry, %low_accum_hi |
| call void (...) @llvm.fake.use(i64 %intermediate_plus_carry) |
| |
| ret i64 %hw64 |
| } |
| |
| |
| ; 'x_lo' can have multiple uses. |
| define void @full_mul_int128__mul_use__x_lo(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[X_LO]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LW64:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| call void (...) @llvm.fake.use(i64 %x_lo) |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_lo' can have multiple uses. |
| define void @full_mul_int128__mul_use__y_lo(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LW64:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| call void (...) @llvm.fake.use(i64 %y_lo) |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'x_hi' can have multiple uses. |
| define void @full_mul_int128__mul_use__x_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__x_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[X_HI]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LW64:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| call void (...) @llvm.fake.use(i64 %x_hi) |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_hi' can have multiple uses. |
| define void @full_mul_int128__mul_use__y_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI]]) |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[HW64:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LW64:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| call void (...) @llvm.fake.use(i64 %y_hi) |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_lo_x_hi' must have exactly 2 uses. |
| define void @full_mul_int128__mul_use__y_lo_x_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_lo_x_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_HI]]) |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| call void (...) @llvm.fake.use(i64 %y_lo_x_hi) |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_hi_x_hi' must have single use. |
| define void @full_mul_int128__mul_use__y_hi_x_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_hi_x_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI_X_HI]]) |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| call void (...) @llvm.fake.use(i64 %y_hi_x_hi) |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_hi_x_lo' must have single use. |
| define void @full_mul_int128__mul_use__y_hi_x_lo(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_hi_x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_HI_X_LO]]) |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| call void (...) @llvm.fake.use(i64 %y_hi_x_lo) |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_lo_x_lo' we allow multiple uses on y_lo_x_lo. |
| ; TODO does not simplify like it should? |
| define void @full_mul_int128__mul_use__y_lo_x_lo(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_lo_x_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = mul i64 [[Y]], [[X_HI]] |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = mul i64 [[Y_HI]], [[X]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_LO]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[TMP5]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM1:%.*]] = shl i64 [[TMP6]], 32 |
| ; CHECK-NEXT: [[LW64:%.*]] = add i64 [[Y_LO_X_LO]], [[LOW_ACCUM1]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| call void (...) @llvm.fake.use(i64 %y_lo_x_lo) |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'cross_sum' must have no more than 3 uses. |
| define void @full_mul_int128__mul_use__cross_sum(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__cross_sum( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM]]) |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| call void (...) @llvm.fake.use(i64 %cross_sum) |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'carry_out' must have single use. |
| define void @full_mul_int128__mul_use__carry_out(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__carry_out( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i1 [[CARRY_OUT]]) |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| call void (...) @llvm.fake.use(i1 %carry_out) |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'carry' must have single use. |
| define void @full_mul_int128__mul_use__carry(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__carry( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CARRY]]) |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| call void (...) @llvm.fake.use(i64 %carry) |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'y_lo_x_lo_hi' must have single use. |
| define void @full_mul_int128__mul_use__y_lo_x_lo_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__y_lo_x_lo_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[Y_LO_X_LO_HI]]) |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| call void (...) @llvm.fake.use(i64 %y_lo_x_lo_hi) |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'cross_sum_lo' must have single use. |
| define void @full_mul_int128__mul_use__cross_sum_lo(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__cross_sum_lo( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM_LO]]) |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| call void (...) @llvm.fake.use(i64 %cross_sum_lo) |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'cross_sum_hi' must have single use. |
| define void @full_mul_int128__mul_use__cross_sum_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__cross_sum_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[CROSS_SUM_HI]]) |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| call void (...) @llvm.fake.use(i64 %cross_sum_hi) |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'low_accum' must have exactly 2 uses if doing high multiply. |
| define void @full_mul_int128__mul_use__low_accum(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__low_accum( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM]]) |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[LW64:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| call void (...) @llvm.fake.use(i64 %low_accum) |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'upper_mid' must have single use. |
| define void @full_mul_int128__mul_use__upper_mid(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__upper_mid( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[UPPER_MID]]) |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[TMP5]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP9:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP9]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| call void (...) @llvm.fake.use(i64 %upper_mid) |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'low_accum_hi' must have single use. |
| define void @full_mul_int128__mul_use__low_accum_hi(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__low_accum_hi( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM_HI]]) |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| call void (...) @llvm.fake.use(i64 %low_accum_hi) |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'upper_mid_with_cross' must have single use. |
| define void @full_mul_int128__mul_use__upper_mid_with_cross(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__upper_mid_with_cross( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[X_LO:%.*]] = and i64 [[X]], 4294967295 |
| ; CHECK-NEXT: [[Y_LO:%.*]] = and i64 [[Y]], 4294967295 |
| ; CHECK-NEXT: [[X_HI:%.*]] = lshr i64 [[X]], 32 |
| ; CHECK-NEXT: [[Y_HI:%.*]] = lshr i64 [[Y]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_HI:%.*]] = mul nuw i64 [[Y_LO]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_HI:%.*]] = mul nuw i64 [[Y_HI]], [[X_HI]] |
| ; CHECK-NEXT: [[Y_HI_X_LO:%.*]] = mul nuw i64 [[Y_HI]], [[X_LO]] |
| ; CHECK-NEXT: [[Y_LO_X_LO:%.*]] = mul nuw i64 [[Y_LO]], [[X_LO]] |
| ; CHECK-NEXT: [[CROSS_SUM:%.*]] = add i64 [[Y_HI_X_LO]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY_OUT:%.*]] = icmp ult i64 [[CROSS_SUM]], [[Y_LO_X_HI]] |
| ; CHECK-NEXT: [[CARRY:%.*]] = select i1 [[CARRY_OUT]], i64 4294967296, i64 0 |
| ; CHECK-NEXT: [[Y_LO_X_LO_HI:%.*]] = lshr i64 [[Y_LO_X_LO]], 32 |
| ; CHECK-NEXT: [[CROSS_SUM_LO:%.*]] = and i64 [[CROSS_SUM]], 4294967295 |
| ; CHECK-NEXT: [[CROSS_SUM_HI:%.*]] = lshr i64 [[CROSS_SUM]], 32 |
| ; CHECK-NEXT: [[LOW_ACCUM:%.*]] = add nuw nsw i64 [[CROSS_SUM_LO]], [[Y_LO_X_LO_HI]] |
| ; CHECK-NEXT: [[UPPER_MID:%.*]] = add nuw i64 [[Y_HI_X_HI]], [[CARRY]] |
| ; CHECK-NEXT: [[LOW_ACCUM_HI:%.*]] = lshr i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[UPPER_MID_WITH_CROSS:%.*]] = add i64 [[UPPER_MID]], [[CROSS_SUM_HI]] |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM_HI]]) |
| ; CHECK-NEXT: [[HW64:%.*]] = add i64 [[UPPER_MID_WITH_CROSS]], [[LOW_ACCUM_HI]] |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[HW64]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = shl i64 [[LOW_ACCUM]], 32 |
| ; CHECK-NEXT: [[Y_LO_X_LO_LO:%.*]] = and i64 [[Y_LO_X_LO]], 4294967295 |
| ; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 [[LOW_ACCUM_SHIFTED]], [[Y_LO_X_LO_LO]] |
| ; CHECK-NEXT: store i64 [[TMP4]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| call void (...) @llvm.fake.use(i64 %low_accum_hi) |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |
| ; 'low_accum_shifted' can have multiple uses. |
| define void @full_mul_int128__mul_use__low_accum_shifted(i64 %x, i64 %y, ptr %p) { |
| ; CHECK-LABEL: define void @full_mul_int128__mul_use__low_accum_shifted( |
| ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[P:%.*]]) { |
| ; CHECK-NEXT: [[TMP1:%.*]] = zext i64 [[X]] to i128 |
| ; CHECK-NEXT: [[TMP2:%.*]] = zext i64 [[Y]] to i128 |
| ; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i128 [[TMP1]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = lshr i128 [[TMP3]], 64 |
| ; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw i128 [[TMP4]] to i64 |
| ; CHECK-NEXT: [[HI_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[P]], i64 8 |
| ; CHECK-NEXT: store i64 [[TMP5]], ptr [[HI_PTR]], align 8 |
| ; CHECK-NEXT: [[LW64:%.*]] = mul i64 [[X]], [[Y]] |
| ; CHECK-NEXT: [[LOW_ACCUM_SHIFTED:%.*]] = and i64 [[LW64]], -4294967296 |
| ; CHECK-NEXT: call void (...) @llvm.fake.use(i64 [[LOW_ACCUM_SHIFTED]]) |
| ; CHECK-NEXT: store i64 [[LW64]], ptr [[P]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x_lo = and i64 %x, 4294967295 |
| %y_lo = and i64 %y, 4294967295 |
| %x_hi = lshr i64 %x, 32 |
| %y_hi = lshr i64 %y, 32 |
| |
| %y_lo_x_hi = mul nuw i64 %y_lo, %x_hi |
| %y_hi_x_hi = mul nuw i64 %y_hi, %x_hi |
| %y_hi_x_lo = mul nuw i64 %y_hi, %x_lo |
| %y_lo_x_lo = mul nuw i64 %y_lo, %x_lo |
| |
| %cross_sum = add i64 %y_hi_x_lo, %y_lo_x_hi |
| |
| %carry_out = icmp ult i64 %cross_sum, %y_lo_x_hi |
| %carry = select i1 %carry_out, i64 4294967296, i64 0 |
| |
| %y_lo_x_lo_hi = lshr i64 %y_lo_x_lo, 32 |
| |
| %cross_sum_lo = and i64 %cross_sum, 4294967295 |
| %cross_sum_hi = lshr i64 %cross_sum, 32 |
| |
| %low_accum = add nuw nsw i64 %cross_sum_lo, %y_lo_x_lo_hi |
| |
| %upper_mid = add nuw i64 %y_hi_x_hi, %carry |
| %low_accum_hi = lshr i64 %low_accum, 32 |
| %upper_mid_with_cross = add i64 %upper_mid, %cross_sum_hi |
| %hw64 = add i64 %upper_mid_with_cross, %low_accum_hi |
| |
| %hi_ptr = getelementptr inbounds i8, ptr %p, i64 8 |
| store i64 %hw64, ptr %hi_ptr, align 8 |
| |
| %low_accum_shifted = shl i64 %low_accum, 32 |
| call void (...) @llvm.fake.use(i64 %low_accum_shifted) |
| %y_lo_x_lo_lo = and i64 %y_lo_x_lo, 4294967295 |
| %lw64 = or disjoint i64 %low_accum_shifted, %y_lo_x_lo_lo |
| |
| store i64 %lw64, ptr %p, align 8 |
| |
| ret void |
| } |
| |