[libclc] Enable `clang fp reciprocal` in clc_native_divide/recip/rsqrt/tan (#149269)
The pragma adds `arcp` flag to `fdiv` instruction in these functions.
The flag can provide better performance.
diff --git a/libclc/clc/lib/generic/math/clc_native_divide.inc b/libclc/clc/lib/generic/math/clc_native_divide.inc
index fdf1794..dac176f 100644
--- a/libclc/clc/lib/generic/math/clc_native_divide.inc
+++ b/libclc/clc/lib/generic/math/clc_native_divide.inc
@@ -8,5 +8,6 @@
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_divide(__CLC_GENTYPE x,
__CLC_GENTYPE y) {
+ _Pragma("clang fp reciprocal(on)");
return x / y;
}
diff --git a/libclc/clc/lib/generic/math/clc_native_recip.inc b/libclc/clc/lib/generic/math/clc_native_recip.inc
index 57eb35a9..e7246dc 100644
--- a/libclc/clc/lib/generic/math/clc_native_recip.inc
+++ b/libclc/clc/lib/generic/math/clc_native_recip.inc
@@ -7,5 +7,6 @@
//===----------------------------------------------------------------------===//
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_recip(__CLC_GENTYPE val) {
+ _Pragma("clang fp reciprocal(on)");
return 1.0f / val;
}
diff --git a/libclc/clc/lib/generic/math/clc_native_rsqrt.inc b/libclc/clc/lib/generic/math/clc_native_rsqrt.inc
index 7a3b0b2..2b2c4bd 100644
--- a/libclc/clc/lib/generic/math/clc_native_rsqrt.inc
+++ b/libclc/clc/lib/generic/math/clc_native_rsqrt.inc
@@ -7,5 +7,6 @@
//===----------------------------------------------------------------------===//
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_rsqrt(__CLC_GENTYPE val) {
+ _Pragma("clang fp reciprocal(on)");
return 1.0f / __clc_native_sqrt(val);
}
diff --git a/libclc/clc/lib/generic/math/clc_native_tan.inc b/libclc/clc/lib/generic/math/clc_native_tan.inc
index f61a789..f0c6c6d 100644
--- a/libclc/clc/lib/generic/math/clc_native_tan.inc
+++ b/libclc/clc/lib/generic/math/clc_native_tan.inc
@@ -7,5 +7,6 @@
//===----------------------------------------------------------------------===//
_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_native_tan(__CLC_GENTYPE val) {
+ _Pragma("clang fp reciprocal(on)");
return __clc_native_sin(val) / __clc_native_cos(val);
}