| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | 
 | ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,FAST | 
 | ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_100 | FileCheck %s --check-prefixes=CHECK,DEFAULT | 
 | ; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 -fp-contract=fast | %ptxas-verify -arch sm_100 %} | 
 | ; RUN: %if ptxas-12.8 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_100 | %ptxas-verify -arch sm_100 %} | 
 |  | 
 | target triple = "nvptx64-unknown-cuda" | 
 |  | 
 | ;; FAST-LABEL: @t0 | 
 | ;; DEFAULT-LABEL: @t0 | 
 | define <2 x float> @t0(<2 x float> %a, <2 x float> %b, <2 x float> %c) { | 
 | ; FAST-LABEL: t0( | 
 | ; FAST:       { | 
 | ; FAST-NEXT:    .reg .b64 %rd<5>; | 
 | ; FAST-EMPTY: | 
 | ; FAST-NEXT:  // %bb.0: | 
 | ; FAST-NEXT:    ld.param.b64 %rd1, [t0_param_0]; | 
 | ; FAST-NEXT:    ld.param.b64 %rd2, [t0_param_1]; | 
 | ; FAST-NEXT:    ld.param.b64 %rd3, [t0_param_2]; | 
 | ; FAST-NEXT:    fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; | 
 | ; FAST-NEXT:    st.param.b64 [func_retval0], %rd4; | 
 | ; FAST-NEXT:    ret; | 
 | ; | 
 | ; DEFAULT-LABEL: t0( | 
 | ; DEFAULT:       { | 
 | ; DEFAULT-NEXT:    .reg .b64 %rd<6>; | 
 | ; DEFAULT-EMPTY: | 
 | ; DEFAULT-NEXT:  // %bb.0: | 
 | ; DEFAULT-NEXT:    ld.param.b64 %rd1, [t0_param_0]; | 
 | ; DEFAULT-NEXT:    ld.param.b64 %rd2, [t0_param_1]; | 
 | ; DEFAULT-NEXT:    mul.rn.f32x2 %rd3, %rd1, %rd2; | 
 | ; DEFAULT-NEXT:    ld.param.b64 %rd4, [t0_param_2]; | 
 | ; DEFAULT-NEXT:    add.rn.f32x2 %rd5, %rd3, %rd4; | 
 | ; DEFAULT-NEXT:    st.param.b64 [func_retval0], %rd5; | 
 | ; DEFAULT-NEXT:    ret; | 
 |   %v0 = fmul <2 x float> %a, %b | 
 |   %v1 = fadd <2 x float> %v0, %c | 
 |   ret <2 x float> %v1 | 
 | } | 
 |  | 
 | ;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32x2 | 
 | ;; to prevent ptxas from fusing this with anything else. | 
 | define <2 x float> @t1(<2 x float> %a, <2 x float> %b) { | 
 | ; FAST-LABEL: t1( | 
 | ; FAST:       { | 
 | ; FAST-NEXT:    .reg .b64 %rd<6>; | 
 | ; FAST-EMPTY: | 
 | ; FAST-NEXT:  // %bb.0: | 
 | ; FAST-NEXT:    ld.param.b64 %rd1, [t1_param_0]; | 
 | ; FAST-NEXT:    ld.param.b64 %rd2, [t1_param_1]; | 
 | ; FAST-NEXT:    add.f32x2 %rd3, %rd1, %rd2; | 
 | ; FAST-NEXT:    sub.f32x2 %rd4, %rd1, %rd2; | 
 | ; FAST-NEXT:    mul.f32x2 %rd5, %rd3, %rd4; | 
 | ; FAST-NEXT:    st.param.b64 [func_retval0], %rd5; | 
 | ; FAST-NEXT:    ret; | 
 | ; | 
 | ; DEFAULT-LABEL: t1( | 
 | ; DEFAULT:       { | 
 | ; DEFAULT-NEXT:    .reg .b64 %rd<6>; | 
 | ; DEFAULT-EMPTY: | 
 | ; DEFAULT-NEXT:  // %bb.0: | 
 | ; DEFAULT-NEXT:    ld.param.b64 %rd1, [t1_param_0]; | 
 | ; DEFAULT-NEXT:    ld.param.b64 %rd2, [t1_param_1]; | 
 | ; DEFAULT-NEXT:    add.rn.f32x2 %rd3, %rd1, %rd2; | 
 | ; DEFAULT-NEXT:    sub.rn.f32x2 %rd4, %rd1, %rd2; | 
 | ; DEFAULT-NEXT:    mul.rn.f32x2 %rd5, %rd3, %rd4; | 
 | ; DEFAULT-NEXT:    st.param.b64 [func_retval0], %rd5; | 
 | ; DEFAULT-NEXT:    ret; | 
 |   %v1 = fadd <2 x float> %a, %b | 
 |   %v2 = fsub <2 x float> %a, %b | 
 |   %v3 = fmul <2 x float> %v1, %v2 | 
 |   ret <2 x float> %v3 | 
 | } | 
 |  | 
 | ;; Make sure we generate the non ".rn" version when the "contract" flag is | 
 | ;; present on the instructions | 
 | define <2 x float> @t2(<2 x float> %a, <2 x float> %b) { | 
 | ; CHECK-LABEL: t2( | 
 | ; CHECK:       { | 
 | ; CHECK-NEXT:    .reg .b64 %rd<6>; | 
 | ; CHECK-EMPTY: | 
 | ; CHECK-NEXT:  // %bb.0: | 
 | ; CHECK-NEXT:    ld.param.b64 %rd1, [t2_param_0]; | 
 | ; CHECK-NEXT:    ld.param.b64 %rd2, [t2_param_1]; | 
 | ; CHECK-NEXT:    add.f32x2 %rd3, %rd1, %rd2; | 
 | ; CHECK-NEXT:    sub.f32x2 %rd4, %rd1, %rd2; | 
 | ; CHECK-NEXT:    mul.f32x2 %rd5, %rd3, %rd4; | 
 | ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd5; | 
 | ; CHECK-NEXT:    ret; | 
 |   %v1 = fadd contract <2 x float> %a, %b | 
 |   %v2 = fsub contract <2 x float> %a, %b | 
 |   %v3 = fmul contract <2 x float> %v1, %v2 | 
 |   ret <2 x float> %v3 | 
 | } | 
 |  | 
 | ;; Make sure we always fold to fma when the "contract" flag is present | 
 | define <2 x float> @t3(<2 x float> %a, <2 x float> %b, <2 x float> %c) { | 
 | ; CHECK-LABEL: t3( | 
 | ; CHECK:       { | 
 | ; CHECK-NEXT:    .reg .b64 %rd<5>; | 
 | ; CHECK-EMPTY: | 
 | ; CHECK-NEXT:  // %bb.0: | 
 | ; CHECK-NEXT:    ld.param.b64 %rd1, [t3_param_0]; | 
 | ; CHECK-NEXT:    ld.param.b64 %rd2, [t3_param_1]; | 
 | ; CHECK-NEXT:    ld.param.b64 %rd3, [t3_param_2]; | 
 | ; CHECK-NEXT:    fma.rn.f32x2 %rd4, %rd1, %rd2, %rd3; | 
 | ; CHECK-NEXT:    st.param.b64 [func_retval0], %rd4; | 
 | ; CHECK-NEXT:    ret; | 
 |   %v0 = fmul contract <2 x float> %a, %b | 
 |   %v1 = fadd contract <2 x float> %v0, %c | 
 |   ret <2 x float> %v1 | 
 | } |