[PowerPC] Provide XL-compatible vec_round implementation The XL implementation of vec_round for vector double uses "round-to-nearest, ties to even" just as the vector float `version does. However clang and gcc use "round-to-nearest-away" for vector double and "round-to-nearest, ties to even" for vector float. The XL behaviour is implemented under the __XL_COMPAT_ALTIVEC__ macro similarly to other instances of incompatibility. Differential revision: https://reviews.llvm.org/D113642 GitOrigin-RevId: b7bf937bbee38c2db0c0640176ef618d9c746538

commit: 8faaea4004a9d717d1c4c543cca5999cb7a49665 [log] [tgz]
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> Wed Nov 24 04:34:01 2021 -0600
committer: Copybara-Service <copybara-worker@google.com> Wed Nov 24 04:46:39 2021 -0800
tree: f21b37e21fe9c46d7e572a8014064076e1f4fc32
parent: 1d6f72b89e53e0b71d00a230446206e385d4dbf5 [diff]
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index fb808d7..3366e1f 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h

@@ -8413,9 +8413,20 @@
 }
 
 #ifdef __VSX__
+#ifdef __XL_COMPAT_ALTIVEC__
+static __inline__ vector double __ATTRS_o_ai vec_rint(vector double __a);
+static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
+  double __fpscr = __builtin_readflm();
+  __builtin_setrnd(0);
+  vector double __rounded = vec_rint(__a);
+  __builtin_setflm(__fpscr);
+  return __rounded;
+}
+#else
 static __inline__ vector double __ATTRS_o_ai vec_round(vector double __a) {
   return __builtin_vsx_xvrdpi(__a);
 }
+#endif
 
 /* vec_rint */
 

diff --git a/test/CodeGen/builtins-ppc-vsx.c b/test/CodeGen/builtins-ppc-vsx.c
index da16124..b0028e9 100644
--- a/test/CodeGen/builtins-ppc-vsx.c
+++ b/test/CodeGen/builtins-ppc-vsx.c

@@ -409,10 +409,6 @@
 // CHECK: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
 // CHECK-LE: call <4 x float> @llvm.ppc.altivec.vrfin(<4 x float>
 
-  res_vd = vec_round(vd);
-// CHECK: call <2 x double> @llvm.round.v2f64(<2 x double>
-// CHECK-LE: call <2 x double> @llvm.round.v2f64(<2 x double>
-
   res_vd = vec_perm(vd, vd, vuc);
 // CHECK: @llvm.ppc.altivec.vperm
 // CHECK-LE: @llvm.ppc.altivec.vperm

diff --git a/test/CodeGen/builtins-ppc-xlcompat.c b/test/CodeGen/builtins-ppc-xlcompat.c
index 7d350fb..5c27b9d 100644
--- a/test/CodeGen/builtins-ppc-xlcompat.c
+++ b/test/CodeGen/builtins-ppc-xlcompat.c

@@ -5,11 +5,16 @@
 // RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
 // RUN:   -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
 // RUN:   -D__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck %s
+// RUN: %clang_cc1 -target-feature +altivec -target-feature +vsx \
+// RUN:   -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - \
+// RUN:   -U__XL_COMPAT_ALTIVEC__ -target-cpu pwr8 | FileCheck \
+// RUN:   --check-prefix=NOCOMPAT %s
 #include <altivec.h>
 vector double vd = { 3.4e22, 1.8e-3 };
 vector signed long long vsll = { -12345678999ll, 12345678999 };
 vector unsigned long long vull = { 11547229456923630743llu, 18014402265226391llu };
 vector float res_vf;
+vector double res_vd;
 vector signed int res_vsi;
 vector unsigned int res_vui;
 
@@ -38,4 +43,11 @@
 // CHECK:         [[TMP8:%.*]] = load <2 x double>, <2 x double>* @vd, align 16
 // CHECK-NEXT:    fmul <2 x double> [[TMP8]], <double 1.600000e+01, double 1.600000e+01>
 // CHECK:         call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double>
+
+  res_vd = vec_round(vd);
+// CHECK:         call double @llvm.ppc.readflm()
+// CHECK:         call double @llvm.ppc.setrnd(i32 0)
+// CHECK:         call <2 x double> @llvm.rint.v2f64(<2 x double>
+// CHECK:         call double @llvm.ppc.setflm(double
+// NOCOMPAT:      call <2 x double> @llvm.round.v2f64(<2 x double>
 }
commit	8faaea4004a9d717d1c4c543cca5999cb7a49665	[log] [tgz]
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	Wed Nov 24 04:34:01 2021 -0600
committer	Copybara-Service <copybara-worker@google.com>	Wed Nov 24 04:46:39 2021 -0800
tree	f21b37e21fe9c46d7e572a8014064076e1f4fc32
parent	1d6f72b89e53e0b71d00a230446206e385d4dbf5 [diff]