test/CodeGen/AArch64/veclib-llvm.modf.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s | FileCheck %s -check-prefix=SLEEF
 ; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s | FileCheck %s -check-prefix=ARMPL

 define <4 x float> @test_modf_v4f32(<4 x float> %x, ptr %out_integral) {
 ; SLEEF-LABEL: test_modf_v4f32:
 ; SLEEF:       // %bb.0:
 ; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; SLEEF-NEXT:    .cfi_def_cfa_offset 16
 ; SLEEF-NEXT:    .cfi_offset w30, -16
 ; SLEEF-NEXT:    bl _ZGVnN4vl4_modff
 ; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; SLEEF-NEXT:    ret
 ;
 ; ARMPL-LABEL: test_modf_v4f32:
 ; ARMPL:       // %bb.0:
 ; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; ARMPL-NEXT:    .cfi_def_cfa_offset 16
 ; ARMPL-NEXT:    .cfi_offset w30, -16
 ; ARMPL-NEXT:    bl armpl_vmodfq_f32
 ; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; ARMPL-NEXT:    ret
   %result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x)
   %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
   %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
   store <4 x float> %result.1, ptr %out_integral, align 4
   ret <4 x float> %result.0
 }

 define <2 x double> @test_modf_v2f64(<2 x double> %x, ptr %out_integral) {
 ; SLEEF-LABEL: test_modf_v2f64:
 ; SLEEF:       // %bb.0:
 ; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; SLEEF-NEXT:    .cfi_def_cfa_offset 16
 ; SLEEF-NEXT:    .cfi_offset w30, -16
 ; SLEEF-NEXT:    bl _ZGVnN2vl8_modf
 ; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; SLEEF-NEXT:    ret
 ;
 ; ARMPL-LABEL: test_modf_v2f64:
 ; ARMPL:       // %bb.0:
 ; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; ARMPL-NEXT:    .cfi_def_cfa_offset 16
 ; ARMPL-NEXT:    .cfi_offset w30, -16
 ; ARMPL-NEXT:    bl armpl_vmodfq_f64
 ; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; ARMPL-NEXT:    ret
   %result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %x)
   %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
   %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
   store <2 x double> %result.1, ptr %out_integral, align 8
   ret <2 x double> %result.0
 }

 define <vscale x 4 x float> @test_modf_nxv4f32(<vscale x 4 x float> %x, ptr %out_integral) {
 ; SLEEF-LABEL: test_modf_nxv4f32:
 ; SLEEF:       // %bb.0:
 ; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; SLEEF-NEXT:    .cfi_def_cfa_offset 16
 ; SLEEF-NEXT:    .cfi_offset w30, -16
 ; SLEEF-NEXT:    bl _ZGVsNxvl4_modff
 ; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; SLEEF-NEXT:    ret
 ;
 ; ARMPL-LABEL: test_modf_nxv4f32:
 ; ARMPL:       // %bb.0:
 ; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; ARMPL-NEXT:    .cfi_def_cfa_offset 16
 ; ARMPL-NEXT:    .cfi_offset w30, -16
 ; ARMPL-NEXT:    ptrue p0.s
 ; ARMPL-NEXT:    bl armpl_svmodf_f32_x
 ; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; ARMPL-NEXT:    ret
   %result = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.modf.nxv4f32(<vscale x 4 x float> %x)
   %result.0 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 0
   %result.1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 1
   store <vscale x 4 x float> %result.1, ptr %out_integral, align 4
   ret <vscale x 4 x float> %result.0
 }

 define <vscale x 2 x double> @test_modf_nxv2f64(<vscale x 2 x double> %x, ptr %out_integral) {
 ; SLEEF-LABEL: test_modf_nxv2f64:
 ; SLEEF:       // %bb.0:
 ; SLEEF-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; SLEEF-NEXT:    .cfi_def_cfa_offset 16
 ; SLEEF-NEXT:    .cfi_offset w30, -16
 ; SLEEF-NEXT:    bl _ZGVsNxvl8_modf
 ; SLEEF-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; SLEEF-NEXT:    ret
 ;
 ; ARMPL-LABEL: test_modf_nxv2f64:
 ; ARMPL:       // %bb.0:
 ; ARMPL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; ARMPL-NEXT:    .cfi_def_cfa_offset 16
 ; ARMPL-NEXT:    .cfi_offset w30, -16
 ; ARMPL-NEXT:    ptrue p0.d
 ; ARMPL-NEXT:    bl armpl_svmodf_f64_x
 ; ARMPL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; ARMPL-NEXT:    ret
   %result = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.modf.nxv2f64(<vscale x 2 x double> %x)
   %result.0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 0
   %result.1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 1
   store <vscale x 2 x double> %result.1, ptr %out_integral, align 8
   ret <vscale x 2 x double> %result.0
 }

 define <4 x float> @modf_store_merging_load_before_store(<4 x float> %x, ptr %out_integral) {
 ; SLEEF-LABEL: modf_store_merging_load_before_store:
 ; SLEEF:       // %bb.0:
 ; SLEEF-NEXT:    sub sp, sp, #32
 ; SLEEF-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; SLEEF-NEXT:    .cfi_def_cfa_offset 32
 ; SLEEF-NEXT:    .cfi_offset w30, -16
 ; SLEEF-NEXT:    ldr q1, [x0]
 ; SLEEF-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; SLEEF-NEXT:    bl _ZGVnN4vl4_modff
 ; SLEEF-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; SLEEF-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; SLEEF-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; SLEEF-NEXT:    add sp, sp, #32
 ; SLEEF-NEXT:    ret
 ;
 ; ARMPL-LABEL: modf_store_merging_load_before_store:
 ; ARMPL:       // %bb.0:
 ; ARMPL-NEXT:    sub sp, sp, #32
 ; ARMPL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; ARMPL-NEXT:    .cfi_def_cfa_offset 32
 ; ARMPL-NEXT:    .cfi_offset w30, -16
 ; ARMPL-NEXT:    ldr q1, [x0]
 ; ARMPL-NEXT:    str q1, [sp] // 16-byte Folded Spill
 ; ARMPL-NEXT:    bl armpl_vmodfq_f32
 ; ARMPL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
 ; ARMPL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; ARMPL-NEXT:    fadd v0.4s, v1.4s, v0.4s
 ; ARMPL-NEXT:    add sp, sp, #32
 ; ARMPL-NEXT:    ret
   %result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x)
   %result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
   %result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
   %original_intergral = load <4 x float>, ptr %out_integral, align 4
   store <4 x float> %result.1, ptr %out_integral, align 4
   %return = fadd <4 x float> %original_intergral, %result.0
   ret <4 x float> %return
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
	; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=sleefgnuabi < %s \| FileCheck %s -check-prefix=SLEEF
	; RUN: llc -mtriple=aarch64-gnu-linux -mattr=+neon,+sve -vector-library=ArmPL < %s \| FileCheck %s -check-prefix=ARMPL

	define <4 x float> @test_modf_v4f32(<4 x float> %x, ptr %out_integral) {
	; SLEEF-LABEL: test_modf_v4f32:
	; SLEEF: // %bb.0:
	; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; SLEEF-NEXT: .cfi_def_cfa_offset 16
	; SLEEF-NEXT: .cfi_offset w30, -16
	; SLEEF-NEXT: bl _ZGVnN4vl4_modff
	; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; SLEEF-NEXT: ret
	;
	; ARMPL-LABEL: test_modf_v4f32:
	; ARMPL: // %bb.0:
	; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; ARMPL-NEXT: .cfi_def_cfa_offset 16
	; ARMPL-NEXT: .cfi_offset w30, -16
	; ARMPL-NEXT: bl armpl_vmodfq_f32
	; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; ARMPL-NEXT: ret
	%result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x)
	%result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
	%result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
	store <4 x float> %result.1, ptr %out_integral, align 4
	ret <4 x float> %result.0
	}

	define <2 x double> @test_modf_v2f64(<2 x double> %x, ptr %out_integral) {
	; SLEEF-LABEL: test_modf_v2f64:
	; SLEEF: // %bb.0:
	; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; SLEEF-NEXT: .cfi_def_cfa_offset 16
	; SLEEF-NEXT: .cfi_offset w30, -16
	; SLEEF-NEXT: bl _ZGVnN2vl8_modf
	; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; SLEEF-NEXT: ret
	;
	; ARMPL-LABEL: test_modf_v2f64:
	; ARMPL: // %bb.0:
	; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; ARMPL-NEXT: .cfi_def_cfa_offset 16
	; ARMPL-NEXT: .cfi_offset w30, -16
	; ARMPL-NEXT: bl armpl_vmodfq_f64
	; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; ARMPL-NEXT: ret
	%result = call { <2 x double>, <2 x double> } @llvm.modf.v2f64(<2 x double> %x)
	%result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
	%result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
	store <2 x double> %result.1, ptr %out_integral, align 8
	ret <2 x double> %result.0
	}

	define <vscale x 4 x float> @test_modf_nxv4f32(<vscale x 4 x float> %x, ptr %out_integral) {
	; SLEEF-LABEL: test_modf_nxv4f32:
	; SLEEF: // %bb.0:
	; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; SLEEF-NEXT: .cfi_def_cfa_offset 16
	; SLEEF-NEXT: .cfi_offset w30, -16
	; SLEEF-NEXT: bl _ZGVsNxvl4_modff
	; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; SLEEF-NEXT: ret
	;
	; ARMPL-LABEL: test_modf_nxv4f32:
	; ARMPL: // %bb.0:
	; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; ARMPL-NEXT: .cfi_def_cfa_offset 16
	; ARMPL-NEXT: .cfi_offset w30, -16
	; ARMPL-NEXT: ptrue p0.s
	; ARMPL-NEXT: bl armpl_svmodf_f32_x
	; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; ARMPL-NEXT: ret
	%result = call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.modf.nxv4f32(<vscale x 4 x float> %x)
	%result.0 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 0
	%result.1 = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } %result, 1
	store <vscale x 4 x float> %result.1, ptr %out_integral, align 4
	ret <vscale x 4 x float> %result.0
	}

	define <vscale x 2 x double> @test_modf_nxv2f64(<vscale x 2 x double> %x, ptr %out_integral) {
	; SLEEF-LABEL: test_modf_nxv2f64:
	; SLEEF: // %bb.0:
	; SLEEF-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; SLEEF-NEXT: .cfi_def_cfa_offset 16
	; SLEEF-NEXT: .cfi_offset w30, -16
	; SLEEF-NEXT: bl _ZGVsNxvl8_modf
	; SLEEF-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; SLEEF-NEXT: ret
	;
	; ARMPL-LABEL: test_modf_nxv2f64:
	; ARMPL: // %bb.0:
	; ARMPL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; ARMPL-NEXT: .cfi_def_cfa_offset 16
	; ARMPL-NEXT: .cfi_offset w30, -16
	; ARMPL-NEXT: ptrue p0.d
	; ARMPL-NEXT: bl armpl_svmodf_f64_x
	; ARMPL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; ARMPL-NEXT: ret
	%result = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.modf.nxv2f64(<vscale x 2 x double> %x)
	%result.0 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 0
	%result.1 = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } %result, 1
	store <vscale x 2 x double> %result.1, ptr %out_integral, align 8
	ret <vscale x 2 x double> %result.0
	}

	define <4 x float> @modf_store_merging_load_before_store(<4 x float> %x, ptr %out_integral) {
	; SLEEF-LABEL: modf_store_merging_load_before_store:
	; SLEEF: // %bb.0:
	; SLEEF-NEXT: sub sp, sp, #32
	; SLEEF-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
	; SLEEF-NEXT: .cfi_def_cfa_offset 32
	; SLEEF-NEXT: .cfi_offset w30, -16
	; SLEEF-NEXT: ldr q1, [x0]
	; SLEEF-NEXT: str q1, [sp] // 16-byte Folded Spill
	; SLEEF-NEXT: bl _ZGVnN4vl4_modff
	; SLEEF-NEXT: ldr q1, [sp] // 16-byte Folded Reload
	; SLEEF-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
	; SLEEF-NEXT: fadd v0.4s, v1.4s, v0.4s
	; SLEEF-NEXT: add sp, sp, #32
	; SLEEF-NEXT: ret
	;
	; ARMPL-LABEL: modf_store_merging_load_before_store:
	; ARMPL: // %bb.0:
	; ARMPL-NEXT: sub sp, sp, #32
	; ARMPL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
	; ARMPL-NEXT: .cfi_def_cfa_offset 32
	; ARMPL-NEXT: .cfi_offset w30, -16
	; ARMPL-NEXT: ldr q1, [x0]
	; ARMPL-NEXT: str q1, [sp] // 16-byte Folded Spill
	; ARMPL-NEXT: bl armpl_vmodfq_f32
	; ARMPL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
	; ARMPL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
	; ARMPL-NEXT: fadd v0.4s, v1.4s, v0.4s
	; ARMPL-NEXT: add sp, sp, #32
	; ARMPL-NEXT: ret
	%result = call { <4 x float>, <4 x float> } @llvm.modf.v4f32(<4 x float> %x)
	%result.0 = extractvalue { <4 x float>, <4 x float> } %result, 0
	%result.1 = extractvalue { <4 x float>, <4 x float> } %result, 1
	%original_intergral = load <4 x float>, ptr %out_integral, align 4
	store <4 x float> %result.1, ptr %out_integral, align 4
	%return = fadd <4 x float> %original_intergral, %result.0
	ret <4 x float> %return
	}