test/CodeGen/PowerPC/powf_massv_075_025exp.ll - llvm-project/llvm - Git at Google

 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
 ; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s

 ; Exponent is a variable
 define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_var
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %z, i64 %index
   %next.gep31 = getelementptr float, float* %y, i64 %index
   %next.gep32 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep32 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = bitcast float* %next.gep31 to <4 x float>*
   %wide.load33 = load <4 x float>, <4 x float>* %1, align 4
   %2 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> %wide.load33)
   %3 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %2, <4 x float>* %3, align 4
   %index.next = add i64 %index, 4
   %4 = icmp eq i64 %index.next, 1024
   br i1 %4, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is a constant != 0.75 and !=0.25
 define void @vspow_const(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_const
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is a constant != 0.75 and !=0.25 and they are different
 define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_neq_const
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is a constant != 0.75 and !=0.25
 define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_neq075_const
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is a constant != 0.75 and !=0.25
 define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_neq025_const
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is 0.75
 define void @vspow_075(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_075
 ; CHECK-NOT:         bl __powf4_P{{[8,9]}}
 ; CHECK:             xvrsqrtesp
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is 0.25
 define void @vspow_025(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_025
 ; CHECK-NOT:         bl __powf4_P{{[8,9]}}
 ; CHECK:             xvrsqrtesp
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is 0.75 but no proper fast-math flags
 define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_075_nofast
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK-NOT:         xvrsqrtesp
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Exponent is 0.25 but no proper fast-math flags
 define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x)  {
 ; CHECK-LABEL:       @vspow_025_nofast
 ; CHECK-PWR9:        bl __powf4_P9
 ; CHECK-PWR8:        bl __powf4_P8
 ; CHECK-NOT:         xvrsqrtesp
 ; CHECK:             blr
 entry:
   br label %vector.body

 vector.body:
   %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
   %next.gep = getelementptr float, float* %y, i64 %index
   %next.gep19 = getelementptr float, float* %x, i64 %index
   %0 = bitcast float* %next.gep19 to <4 x float>*
   %wide.load = load <4 x float>, <4 x float>* %0, align 4
   %1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
   %2 = bitcast float* %next.gep to <4 x float>*
   store <4 x float> %1, <4 x float>* %2, align 4
   %index.next = add i64 %index, 4
   %3 = icmp eq i64 %index.next, 1024
   br i1 %3, label %for.end, label %vector.body

 for.end:
   ret void
 }

 ; Function Attrs: nounwind readnone speculatable willreturn
 declare <4 x float> @__powf4_P8(<4 x float>, <4 x float>)
	; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \| FileCheck -check-prefixes=CHECK-PWR9 %s
	; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 \| FileCheck -check-prefixes=CHECK-PWR8 %s

	; Exponent is a variable
	define void @vspow_var(float* nocapture %z, float* nocapture readonly %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_var
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %z, i64 %index
	%next.gep31 = getelementptr float, float* %y, i64 %index
	%next.gep32 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep32 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = bitcast float* %next.gep31 to <4 x float>*
	%wide.load33 = load <4 x float>, <4 x float>* %1, align 4
	%2 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> %wide.load33)
	%3 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %2, <4 x float>* %3, align 4
	%index.next = add i64 %index, 4
	%4 = icmp eq i64 %index.next, 1024
	br i1 %4, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is a constant != 0.75 and !=0.25
	define void @vspow_const(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_const
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is a constant != 0.75 and !=0.25 and they are different
	define void @vspow_neq_const(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_neq_const
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is a constant != 0.75 and !=0.25
	define void @vspow_neq075_const(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_neq075_const
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is a constant != 0.75 and !=0.25
	define void @vspow_neq025_const(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_neq025_const
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is 0.75
	define void @vspow_075(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_075
	; CHECK-NOT: bl __powf4_P{{[8,9]}}
	; CHECK: xvrsqrtesp
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is 0.25
	define void @vspow_025(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_025
	; CHECK-NOT: bl __powf4_P{{[8,9]}}
	; CHECK: xvrsqrtesp
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call ninf afn nsz <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is 0.75 but no proper fast-math flags
	define void @vspow_075_nofast(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_075_nofast
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK-NOT: xvrsqrtesp
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Exponent is 0.25 but no proper fast-math flags
	define void @vspow_025_nofast(float* nocapture %y, float* nocapture readonly %x) {
	; CHECK-LABEL: @vspow_025_nofast
	; CHECK-PWR9: bl __powf4_P9
	; CHECK-PWR8: bl __powf4_P8
	; CHECK-NOT: xvrsqrtesp
	; CHECK: blr
	entry:
	br label %vector.body

	vector.body:
	%index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
	%next.gep = getelementptr float, float* %y, i64 %index
	%next.gep19 = getelementptr float, float* %x, i64 %index
	%0 = bitcast float* %next.gep19 to <4 x float>*
	%wide.load = load <4 x float>, <4 x float>* %0, align 4
	%1 = call <4 x float> @__powf4_P8(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
	%2 = bitcast float* %next.gep to <4 x float>*
	store <4 x float> %1, <4 x float>* %2, align 4
	%index.next = add i64 %index, 4
	%3 = icmp eq i64 %index.next, 1024
	br i1 %3, label %for.end, label %vector.body

	for.end:
	ret void
	}

	; Function Attrs: nounwind readnone speculatable willreturn
	declare <4 x float> @__powf4_P8(<4 x float>, <4 x float>)