llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sqrt.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s

 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"

 declare float @_Z4sqrtf(float)
 declare <2 x float> @_Z4sqrtDv2_f(<2 x float>)
 declare <3 x float> @_Z4sqrtDv3_f(<3 x float>)
 declare <4 x float> @_Z4sqrtDv4_f(<4 x float>)
 declare <8 x float> @_Z4sqrtDv8_f(<8 x float>)
 declare <16 x float> @_Z4sqrtDv16_f(<16 x float>)

 declare double @_Z4sqrtd(double)
 declare <2 x double> @_Z4sqrtDv2_d(<2 x double>)
 declare <3 x double> @_Z4sqrtDv3_d(<3 x double>)
 declare <4 x double> @_Z4sqrtDv4_d(<4 x double>)
 declare <8 x double> @_Z4sqrtDv8_d(<8 x double>)
 declare <16 x double> @_Z4sqrtDv16_d(<16 x double>)

 declare half @_Z4sqrtDh(half)
 declare <2 x half> @_Z4sqrtDv2_Dh(<2 x half>)
 declare <3 x half> @_Z4sqrtDv3_Dh(<3 x half>)
 declare <4 x half> @_Z4sqrtDv4_Dh(<4 x half>)
 declare <8 x half> @_Z4sqrtDv8_Dh(<8 x half>)
 declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)

 define float @test_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
   ret <2 x float> %sqrt
 }

 define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
   ret <3 x float> %sqrt
 }

 define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
   ret <4 x float> %sqrt
 }

 define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
   ret <8 x float> %sqrt
 }

 define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
   ret <16 x float> %sqrt
 }

 define float @test_sqrt_cr_f32(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg)
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
   ret <2 x float> %sqrt
 }

 define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
 ; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
 ; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]])
 ; CHECK-NEXT:    ret <3 x float> [[SQRT]]
 ;
   %sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
   ret <3 x float> %sqrt
 }

 define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
 ; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
 ; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
 ; CHECK-NEXT:    ret <4 x float> [[SQRT]]
 ;
   %sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
   ret <4 x float> %sqrt
 }

 define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
 ; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
 ; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]])
 ; CHECK-NEXT:    ret <8 x float> [[SQRT]]
 ;
   %sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
   ret <8 x float> %sqrt
 }

 define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
 ; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
 ; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]])
 ; CHECK-NEXT:    ret <16 x float> [[SQRT]]
 ;
   %sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
   ret <16 x float> %sqrt
 }

 define double @test_sqrt_f64(double %arg) {
 ; CHECK-LABEL: define double @test_sqrt_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[ARG]])
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @_Z4sqrtd(double %arg)
   ret double %sqrt
 }

 define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
 ; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
 ; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[ARG]])
 ; CHECK-NEXT:    ret <2 x double> [[SQRT]]
 ;
   %sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
   ret <2 x double> %sqrt
 }

 define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
 ; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
 ; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x double> @llvm.sqrt.v3f64(<3 x double> [[ARG]])
 ; CHECK-NEXT:    ret <3 x double> [[SQRT]]
 ;
   %sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
   ret <3 x double> %sqrt
 }

 define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
 ; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
 ; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[ARG]])
 ; CHECK-NEXT:    ret <4 x double> [[SQRT]]
 ;
   %sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
   ret <4 x double> %sqrt
 }

 define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
 ; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
 ; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[ARG]])
 ; CHECK-NEXT:    ret <8 x double> [[SQRT]]
 ;
   %sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
   ret <8 x double> %sqrt
 }

 define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
 ; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
 ; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x double> @llvm.sqrt.v16f64(<16 x double> [[ARG]])
 ; CHECK-NEXT:    ret <16 x double> [[SQRT]]
 ;
   %sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
   ret <16 x double> %sqrt
 }

 define half @test_sqrt_f16(half %arg) {
 ; CHECK-LABEL: define half @test_sqrt_f16
 ; CHECK-SAME: (half [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call half @llvm.sqrt.f16(half [[ARG]])
 ; CHECK-NEXT:    ret half [[SQRT]]
 ;
   %sqrt = tail call half @_Z4sqrtDh(half %arg)
   ret half %sqrt
 }

 define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
 ; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
 ; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x half> @llvm.sqrt.v2f16(<2 x half> [[ARG]])
 ; CHECK-NEXT:    ret <2 x half> [[SQRT]]
 ;
   %sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
   ret <2 x half> %sqrt
 }

 define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
 ; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
 ; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <3 x half> @llvm.sqrt.v3f16(<3 x half> [[ARG]])
 ; CHECK-NEXT:    ret <3 x half> [[SQRT]]
 ;
   %sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
   ret <3 x half> %sqrt
 }

 define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
 ; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
 ; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[ARG]])
 ; CHECK-NEXT:    ret <4 x half> [[SQRT]]
 ;
   %sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
   ret <4 x half> %sqrt
 }

 define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
 ; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
 ; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[ARG]])
 ; CHECK-NEXT:    ret <8 x half> [[SQRT]]
 ;
   %sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
   ret <8 x half> %sqrt
 }

 define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
 ; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
 ; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <16 x half> @llvm.sqrt.v16f16(<16 x half> [[ARG]])
 ; CHECK-NEXT:    ret <16 x half> [[SQRT]]
 ;
   %sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
   ret <16 x half> %sqrt
 }

 define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
   ret <2 x float> %sqrt
 }

 define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
   ret <2 x float> %sqrt
 }

 ; "no-builtins" should be ignored
 define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
   ret <2 x float> %sqrt
 }

 define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
 ; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @_Z4sqrtf(float %arg) #0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
   ret <2 x float> %sqrt
 }

 define float @test_sqrt_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
   ret <2 x float> %sqrt
 }

 define float @test_sqrt_f32_preserve_flags_md(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
   ret <2 x float> %sqrt
 }

 define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
 ; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call ninf contract float @llvm.sqrt.f32(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call ninf contract float @_Z4sqrtf(float %arg)
   ret float %sqrt
 }

 define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
 ; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags
 ; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call nnan nsz <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
 ; CHECK-NEXT:    ret <2 x float> [[SQRT]]
 ;
   %sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
   ret <2 x float> %sqrt
 }

 ; Test the libm name, not a recognized opencl builtin.
 declare float @sqrtf(float) #2
 declare double @sqrt(double) #2

 define float @test_libm_sqrt_f32(float %arg) {
 ; CHECK-LABEL: define float @test_libm_sqrt_f32
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]])
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @sqrtf(float %arg)
   ret float %sqrt
 }

 define float @test_libm_sqrt_f32_fpmath(float %arg) {
 ; CHECK-LABEL: define float @test_libm_sqrt_f32_fpmath
 ; CHECK-SAME: (float [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret float [[SQRT]]
 ;
   %sqrt = tail call float @sqrtf(float %arg), !fpmath !0
   ret float %sqrt
 }

 define double @test_libm_sqrt_f64(double %arg) {
 ; CHECK-LABEL: define double @test_libm_sqrt_f64
 ; CHECK-SAME: (double [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]])
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @sqrt(double %arg)
   ret double %sqrt
 }

 define double @test_libm_sqrt_f64_fpmath(double %arg) {
 ; CHECK-LABEL: define double @test_libm_sqrt_f64_fpmath
 ; CHECK-SAME: (double [[ARG:%.*]]) {
 ; CHECK-NEXT:    [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]), !fpmath [[META0]]
 ; CHECK-NEXT:    ret double [[SQRT]]
 ;
   %sqrt = tail call double @sqrt(double %arg), !fpmath !0
   ret double %sqrt
 }

 attributes #0 = { nobuiltin }
 attributes #1 = { "no-builtins" }
 attributes #2 = { nounwind memory(none) }

 !0 = !{float 3.000000e+00}
 !1 = !{i32 1234}
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
	; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s \| FileCheck %s

	target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"

	declare float @_Z4sqrtf(float)
	declare <2 x float> @_Z4sqrtDv2_f(<2 x float>)
	declare <3 x float> @_Z4sqrtDv3_f(<3 x float>)
	declare <4 x float> @_Z4sqrtDv4_f(<4 x float>)
	declare <8 x float> @_Z4sqrtDv8_f(<8 x float>)
	declare <16 x float> @_Z4sqrtDv16_f(<16 x float>)

	declare double @_Z4sqrtd(double)
	declare <2 x double> @_Z4sqrtDv2_d(<2 x double>)
	declare <3 x double> @_Z4sqrtDv3_d(<3 x double>)
	declare <4 x double> @_Z4sqrtDv4_d(<4 x double>)
	declare <8 x double> @_Z4sqrtDv8_d(<8 x double>)
	declare <16 x double> @_Z4sqrtDv16_d(<16 x double>)

	declare half @_Z4sqrtDh(half)
	declare <2 x half> @_Z4sqrtDv2_Dh(<2 x half>)
	declare <3 x half> @_Z4sqrtDv3_Dh(<3 x half>)
	declare <4 x half> @_Z4sqrtDv4_Dh(<4 x half>)
	declare <8 x half> @_Z4sqrtDv8_Dh(<8 x half>)
	declare <16 x half> @_Z4sqrtDv16_Dh(<16 x half>)

	define float @test_sqrt_f32(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_f32
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0:![0-9]+]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg), !fpmath !0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_v2f32(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
	ret <2 x float> %sqrt
	}

	define <3 x float> @test_sqrt_v3f32(<3 x float> %arg) {
	; CHECK-LABEL: define <3 x float> @test_sqrt_v3f32
	; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <3 x float> [[SQRT]]
	;
	%sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg), !fpmath !0
	ret <3 x float> %sqrt
	}

	define <4 x float> @test_sqrt_v4f32(<4 x float> %arg) {
	; CHECK-LABEL: define <4 x float> @test_sqrt_v4f32
	; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <4 x float> [[SQRT]]
	;
	%sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg), !fpmath !0
	ret <4 x float> %sqrt
	}

	define <8 x float> @test_sqrt_v8f32(<8 x float> %arg) {
	; CHECK-LABEL: define <8 x float> @test_sqrt_v8f32
	; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <8 x float> [[SQRT]]
	;
	%sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg), !fpmath !0
	ret <8 x float> %sqrt
	}

	define <16 x float> @test_sqrt_v16f32(<16 x float> %arg) {
	; CHECK-LABEL: define <16 x float> @test_sqrt_v16f32
	; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <16 x float> [[SQRT]]
	;
	%sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg), !fpmath !0
	ret <16 x float> %sqrt
	}

	define float @test_sqrt_cr_f32(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_cr_f32
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @llvm.sqrt.f32(float [[ARG]])
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg)
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_cr_v2f32(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
	ret <2 x float> %sqrt
	}

	define <3 x float> @test_sqrt_cr_v3f32(<3 x float> %arg) {
	; CHECK-LABEL: define <3 x float> @test_sqrt_cr_v3f32
	; CHECK-SAME: (<3 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x float> @llvm.sqrt.v3f32(<3 x float> [[ARG]])
	; CHECK-NEXT: ret <3 x float> [[SQRT]]
	;
	%sqrt = tail call <3 x float> @_Z4sqrtDv3_f(<3 x float> %arg)
	ret <3 x float> %sqrt
	}

	define <4 x float> @test_sqrt_cr_v4f32(<4 x float> %arg) {
	; CHECK-LABEL: define <4 x float> @test_sqrt_cr_v4f32
	; CHECK-SAME: (<4 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]])
	; CHECK-NEXT: ret <4 x float> [[SQRT]]
	;
	%sqrt = tail call <4 x float> @_Z4sqrtDv4_f(<4 x float> %arg)
	ret <4 x float> %sqrt
	}

	define <8 x float> @test_sqrt_cr_v8f32(<8 x float> %arg) {
	; CHECK-LABEL: define <8 x float> @test_sqrt_cr_v8f32
	; CHECK-SAME: (<8 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[ARG]])
	; CHECK-NEXT: ret <8 x float> [[SQRT]]
	;
	%sqrt = tail call <8 x float> @_Z4sqrtDv8_f(<8 x float> %arg)
	ret <8 x float> %sqrt
	}

	define <16 x float> @test_sqrt_cr_v16f32(<16 x float> %arg) {
	; CHECK-LABEL: define <16 x float> @test_sqrt_cr_v16f32
	; CHECK-SAME: (<16 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[ARG]])
	; CHECK-NEXT: ret <16 x float> [[SQRT]]
	;
	%sqrt = tail call <16 x float> @_Z4sqrtDv16_f(<16 x float> %arg)
	ret <16 x float> %sqrt
	}

	define double @test_sqrt_f64(double %arg) {
	; CHECK-LABEL: define double @test_sqrt_f64
	; CHECK-SAME: (double [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call double @llvm.sqrt.f64(double [[ARG]])
	; CHECK-NEXT: ret double [[SQRT]]
	;
	%sqrt = tail call double @_Z4sqrtd(double %arg)
	ret double %sqrt
	}

	define <2 x double> @test_sqrt_v2f64(<2 x double> %arg) {
	; CHECK-LABEL: define <2 x double> @test_sqrt_v2f64
	; CHECK-SAME: (<2 x double> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[ARG]])
	; CHECK-NEXT: ret <2 x double> [[SQRT]]
	;
	%sqrt = tail call <2 x double> @_Z4sqrtDv2_d(<2 x double> %arg)
	ret <2 x double> %sqrt
	}

	define <3 x double> @test_sqrt_v3f64(<3 x double> %arg) {
	; CHECK-LABEL: define <3 x double> @test_sqrt_v3f64
	; CHECK-SAME: (<3 x double> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x double> @llvm.sqrt.v3f64(<3 x double> [[ARG]])
	; CHECK-NEXT: ret <3 x double> [[SQRT]]
	;
	%sqrt = tail call <3 x double> @_Z4sqrtDv3_d(<3 x double> %arg)
	ret <3 x double> %sqrt
	}

	define <4 x double> @test_sqrt_v4f64(<4 x double> %arg) {
	; CHECK-LABEL: define <4 x double> @test_sqrt_v4f64
	; CHECK-SAME: (<4 x double> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[ARG]])
	; CHECK-NEXT: ret <4 x double> [[SQRT]]
	;
	%sqrt = tail call <4 x double> @_Z4sqrtDv4_d(<4 x double> %arg)
	ret <4 x double> %sqrt
	}

	define <8 x double> @test_sqrt_v8f64(<8 x double> %arg) {
	; CHECK-LABEL: define <8 x double> @test_sqrt_v8f64
	; CHECK-SAME: (<8 x double> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[ARG]])
	; CHECK-NEXT: ret <8 x double> [[SQRT]]
	;
	%sqrt = tail call <8 x double> @_Z4sqrtDv8_d(<8 x double> %arg)
	ret <8 x double> %sqrt
	}

	define <16 x double> @test_sqrt_v16f64(<16 x double> %arg) {
	; CHECK-LABEL: define <16 x double> @test_sqrt_v16f64
	; CHECK-SAME: (<16 x double> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x double> @llvm.sqrt.v16f64(<16 x double> [[ARG]])
	; CHECK-NEXT: ret <16 x double> [[SQRT]]
	;
	%sqrt = tail call <16 x double> @_Z4sqrtDv16_d(<16 x double> %arg)
	ret <16 x double> %sqrt
	}

	define half @test_sqrt_f16(half %arg) {
	; CHECK-LABEL: define half @test_sqrt_f16
	; CHECK-SAME: (half [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call half @llvm.sqrt.f16(half [[ARG]])
	; CHECK-NEXT: ret half [[SQRT]]
	;
	%sqrt = tail call half @_Z4sqrtDh(half %arg)
	ret half %sqrt
	}

	define <2 x half> @test_sqrt_v2f16(<2 x half> %arg) {
	; CHECK-LABEL: define <2 x half> @test_sqrt_v2f16
	; CHECK-SAME: (<2 x half> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x half> @llvm.sqrt.v2f16(<2 x half> [[ARG]])
	; CHECK-NEXT: ret <2 x half> [[SQRT]]
	;
	%sqrt = tail call <2 x half> @_Z4sqrtDv2_Dh(<2 x half> %arg)
	ret <2 x half> %sqrt
	}

	define <3 x half> @test_sqrt_v3f16(<3 x half> %arg) {
	; CHECK-LABEL: define <3 x half> @test_sqrt_v3f16
	; CHECK-SAME: (<3 x half> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <3 x half> @llvm.sqrt.v3f16(<3 x half> [[ARG]])
	; CHECK-NEXT: ret <3 x half> [[SQRT]]
	;
	%sqrt = tail call <3 x half> @_Z4sqrtDv3_Dh(<3 x half> %arg)
	ret <3 x half> %sqrt
	}

	define <4 x half> @test_sqrt_v4f16(<4 x half> %arg) {
	; CHECK-LABEL: define <4 x half> @test_sqrt_v4f16
	; CHECK-SAME: (<4 x half> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <4 x half> @llvm.sqrt.v4f16(<4 x half> [[ARG]])
	; CHECK-NEXT: ret <4 x half> [[SQRT]]
	;
	%sqrt = tail call <4 x half> @_Z4sqrtDv4_Dh(<4 x half> %arg)
	ret <4 x half> %sqrt
	}

	define <8 x half> @test_sqrt_v8f16(<8 x half> %arg) {
	; CHECK-LABEL: define <8 x half> @test_sqrt_v8f16
	; CHECK-SAME: (<8 x half> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <8 x half> @llvm.sqrt.v8f16(<8 x half> [[ARG]])
	; CHECK-NEXT: ret <8 x half> [[SQRT]]
	;
	%sqrt = tail call <8 x half> @_Z4sqrtDv8_Dh(<8 x half> %arg)
	ret <8 x half> %sqrt
	}

	define <16 x half> @test_sqrt_v16f16(<16 x half> %arg) {
	; CHECK-LABEL: define <16 x half> @test_sqrt_v16f16
	; CHECK-SAME: (<16 x half> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <16 x half> @llvm.sqrt.v16f16(<16 x half> [[ARG]])
	; CHECK-NEXT: ret <16 x half> [[SQRT]]
	;
	%sqrt = tail call <16 x half> @_Z4sqrtDv16_Dh(<16 x half> %arg)
	ret <16 x half> %sqrt
	}

	define float @test_sqrt_f32_nobuiltin_callsite(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_f32_nobuiltin_callsite
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3:[0-9]+]], !fpmath [[META0]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltin_callsite
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
	ret <2 x float> %sqrt
	}

	define float @test_sqrt_cr_f32_nobuiltin_callsite(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltin_callsite
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg) #0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltin_callsite
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
	ret <2 x float> %sqrt
	}

	; "no-builtins" should be ignored
	define float @test_sqrt_f32_nobuiltins(float %arg) #1 {
	; CHECK-LABEL: define float @test_sqrt_f32_nobuiltins
	; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg) #0, !fpmath !0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_v2f32_nobuiltins(<2 x float> %arg) #1 {
	; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_nobuiltins
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]], !fpmath [[META0]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0, !fpmath !0
	ret <2 x float> %sqrt
	}

	define float @test_sqrt_cr_f32_nobuiltins(float %arg) #1 {
	; CHECK-LABEL: define float @test_sqrt_cr_f32_nobuiltins
	; CHECK-SAME: (float [[ARG:%.*]]) #[[ATTR0]] {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @_Z4sqrtf(float [[ARG]]) #[[ATTR3]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @_Z4sqrtf(float %arg) #0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_cr_v2f32_nobuiltins(<2 x float> %arg) #1 {
	; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_nobuiltins
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) #[[ATTR0]] {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> [[ARG]]) #[[ATTR3]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg) #0
	ret <2 x float> %sqrt
	}

	define float @test_sqrt_f32_preserve_flags(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_v2f32_preserve_flags(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0
	ret <2 x float> %sqrt
	}

	define float @test_sqrt_f32_preserve_flags_md(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_f32_preserve_flags_md
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan ninf float @llvm.sqrt.f32(float [[ARG]]), !fpmath [[META0]], !foo [[META1:![0-9]+]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call nnan ninf float @_Z4sqrtf(float %arg), !fpmath !0, !foo !1
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_v2f32_preserve_flags_md(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_v2f32_preserve_flags_md
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]]), !fpmath [[META0]], !foo [[META1]]
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call contract nsz nnan <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg), !fpmath !0, !foo !1
	ret <2 x float> %sqrt
	}

	define float @test_sqrt_cr_f32_preserve_flags(float %arg) {
	; CHECK-LABEL: define float @test_sqrt_cr_f32_preserve_flags
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call ninf contract float @llvm.sqrt.f32(float [[ARG]])
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call ninf contract float @_Z4sqrtf(float %arg)
	ret float %sqrt
	}

	define <2 x float> @test_sqrt_cr_v2f32_preserve_flags(<2 x float> %arg) {
	; CHECK-LABEL: define <2 x float> @test_sqrt_cr_v2f32_preserve_flags
	; CHECK-SAME: (<2 x float> [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call nnan nsz <2 x float> @llvm.sqrt.v2f32(<2 x float> [[ARG]])
	; CHECK-NEXT: ret <2 x float> [[SQRT]]
	;
	%sqrt = tail call nnan nsz <2 x float> @_Z4sqrtDv2_f(<2 x float> %arg)
	ret <2 x float> %sqrt
	}

	; Test the libm name, not a recognized opencl builtin.
	declare float @sqrtf(float) #2
	declare double @sqrt(double) #2

	define float @test_libm_sqrt_f32(float %arg) {
	; CHECK-LABEL: define float @test_libm_sqrt_f32
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]])
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @sqrtf(float %arg)
	ret float %sqrt
	}

	define float @test_libm_sqrt_f32_fpmath(float %arg) {
	; CHECK-LABEL: define float @test_libm_sqrt_f32_fpmath
	; CHECK-SAME: (float [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call float @sqrtf(float [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret float [[SQRT]]
	;
	%sqrt = tail call float @sqrtf(float %arg), !fpmath !0
	ret float %sqrt
	}

	define double @test_libm_sqrt_f64(double %arg) {
	; CHECK-LABEL: define double @test_libm_sqrt_f64
	; CHECK-SAME: (double [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]])
	; CHECK-NEXT: ret double [[SQRT]]
	;
	%sqrt = tail call double @sqrt(double %arg)
	ret double %sqrt
	}

	define double @test_libm_sqrt_f64_fpmath(double %arg) {
	; CHECK-LABEL: define double @test_libm_sqrt_f64_fpmath
	; CHECK-SAME: (double [[ARG:%.*]]) {
	; CHECK-NEXT: [[SQRT:%.*]] = tail call double @sqrt(double [[ARG]]), !fpmath [[META0]]
	; CHECK-NEXT: ret double [[SQRT]]
	;
	%sqrt = tail call double @sqrt(double %arg), !fpmath !0
	ret double %sqrt
	}

	attributes #0 = { nobuiltin }
	attributes #1 = { "no-builtins" }
	attributes #2 = { nounwind memory(none) }

	!0 = !{float 3.000000e+00}
	!1 = !{i32 1234}