test/CodeGen/RISCV/bfloat.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32I-ILP32
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV64I-LP64

 ; TODO: Enable codegen for hard float.

 define bfloat @float_to_bfloat(float %a) nounwind {
 ; RV32I-ILP32-LABEL: float_to_bfloat:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    call __truncsfbf2@plt
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: float_to_bfloat:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    call __truncsfbf2@plt
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fptrunc float %a to bfloat
   ret bfloat %1
 }

 define bfloat @double_to_bfloat(double %a) nounwind {
 ; RV32I-ILP32-LABEL: double_to_bfloat:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    call __truncdfbf2@plt
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: double_to_bfloat:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    call __truncdfbf2@plt
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fptrunc double %a to bfloat
   ret bfloat %1
 }

 define float @bfloat_to_float(bfloat %a) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_to_float:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    slli a0, a0, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_to_float:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    slliw a0, a0, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fpext bfloat %a to float
   ret float %1
 }

 define double @bfloat_to_double(bfloat %a) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_to_double:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    slli a0, a0, 16
 ; RV32I-ILP32-NEXT:    call __extendsfdf2@plt
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_to_double:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    slliw a0, a0, 16
 ; RV64I-LP64-NEXT:    call __extendsfdf2@plt
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fpext bfloat %a to double
   ret double %1
 }

 define bfloat @i16_to_bfloat(i16 %a) nounwind {
 ; RV32I-ILP32-LABEL: i16_to_bfloat:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: i16_to_bfloat:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    ret
   %1 = bitcast i16 %a to bfloat
   ret bfloat %1
 }

 define i16 @bfloat_to_i16(bfloat %a) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_to_i16:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_to_i16:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    ret
   %1 = bitcast bfloat %a to i16
   ret i16 %1
 }

 define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_add:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    slli a0, a0, 16
 ; RV32I-ILP32-NEXT:    slli a1, a1, 16
 ; RV32I-ILP32-NEXT:    call __addsf3@plt
 ; RV32I-ILP32-NEXT:    call __truncsfbf2@plt
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_add:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    slliw a0, a0, 16
 ; RV64I-LP64-NEXT:    slliw a1, a1, 16
 ; RV64I-LP64-NEXT:    call __addsf3@plt
 ; RV64I-LP64-NEXT:    call __truncsfbf2@plt
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fadd bfloat %a, %b
   ret bfloat %1
 }

 define bfloat @bfloat_load(ptr %a) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_load:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    lh a1, 0(a0)
 ; RV32I-ILP32-NEXT:    lh a2, 6(a0)
 ; RV32I-ILP32-NEXT:    slli a0, a1, 16
 ; RV32I-ILP32-NEXT:    slli a1, a2, 16
 ; RV32I-ILP32-NEXT:    call __addsf3@plt
 ; RV32I-ILP32-NEXT:    call __truncsfbf2@plt
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_load:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    lh a1, 0(a0)
 ; RV64I-LP64-NEXT:    lh a2, 6(a0)
 ; RV64I-LP64-NEXT:    slliw a0, a1, 16
 ; RV64I-LP64-NEXT:    slliw a1, a2, 16
 ; RV64I-LP64-NEXT:    call __addsf3@plt
 ; RV64I-LP64-NEXT:    call __truncsfbf2@plt
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = load bfloat, ptr %a
   %2 = getelementptr bfloat, ptr %a, i32 3
   %3 = load bfloat, ptr %2
   %4 = fadd bfloat %1, %3
   ret bfloat %4
 }

 define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
 ; RV32I-ILP32-LABEL: bfloat_store:
 ; RV32I-ILP32:       # %bb.0:
 ; RV32I-ILP32-NEXT:    addi sp, sp, -16
 ; RV32I-ILP32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-ILP32-NEXT:    mv s0, a0
 ; RV32I-ILP32-NEXT:    slli a0, a1, 16
 ; RV32I-ILP32-NEXT:    slli a1, a2, 16
 ; RV32I-ILP32-NEXT:    call __addsf3@plt
 ; RV32I-ILP32-NEXT:    call __truncsfbf2@plt
 ; RV32I-ILP32-NEXT:    sh a0, 0(s0)
 ; RV32I-ILP32-NEXT:    sh a0, 16(s0)
 ; RV32I-ILP32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-ILP32-NEXT:    addi sp, sp, 16
 ; RV32I-ILP32-NEXT:    ret
 ;
 ; RV64I-LP64-LABEL: bfloat_store:
 ; RV64I-LP64:       # %bb.0:
 ; RV64I-LP64-NEXT:    addi sp, sp, -16
 ; RV64I-LP64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64I-LP64-NEXT:    mv s0, a0
 ; RV64I-LP64-NEXT:    slliw a0, a1, 16
 ; RV64I-LP64-NEXT:    slliw a1, a2, 16
 ; RV64I-LP64-NEXT:    call __addsf3@plt
 ; RV64I-LP64-NEXT:    call __truncsfbf2@plt
 ; RV64I-LP64-NEXT:    sh a0, 0(s0)
 ; RV64I-LP64-NEXT:    sh a0, 16(s0)
 ; RV64I-LP64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-LP64-NEXT:    addi sp, sp, 16
 ; RV64I-LP64-NEXT:    ret
   %1 = fadd bfloat %b, %c
   store bfloat %1, ptr %a
   %2 = getelementptr bfloat, ptr %a, i32 8
   store bfloat %1, ptr %2
   ret void
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \| FileCheck %s -check-prefix=RV32I-ILP32
	; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \| FileCheck %s -check-prefix=RV64I-LP64

	; TODO: Enable codegen for hard float.

	define bfloat @float_to_bfloat(float %a) nounwind {
	; RV32I-ILP32-LABEL: float_to_bfloat:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: call __truncsfbf2@plt
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: float_to_bfloat:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: call __truncsfbf2@plt
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = fptrunc float %a to bfloat
	ret bfloat %1
	}

	define bfloat @double_to_bfloat(double %a) nounwind {
	; RV32I-ILP32-LABEL: double_to_bfloat:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: call __truncdfbf2@plt
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: double_to_bfloat:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: call __truncdfbf2@plt
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = fptrunc double %a to bfloat
	ret bfloat %1
	}

	define float @bfloat_to_float(bfloat %a) nounwind {
	; RV32I-ILP32-LABEL: bfloat_to_float:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: slli a0, a0, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_to_float:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: slliw a0, a0, 16
	; RV64I-LP64-NEXT: ret
	%1 = fpext bfloat %a to float
	ret float %1
	}

	define double @bfloat_to_double(bfloat %a) nounwind {
	; RV32I-ILP32-LABEL: bfloat_to_double:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: slli a0, a0, 16
	; RV32I-ILP32-NEXT: call __extendsfdf2@plt
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_to_double:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: slliw a0, a0, 16
	; RV64I-LP64-NEXT: call __extendsfdf2@plt
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = fpext bfloat %a to double
	ret double %1
	}

	define bfloat @i16_to_bfloat(i16 %a) nounwind {
	; RV32I-ILP32-LABEL: i16_to_bfloat:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: i16_to_bfloat:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: ret
	%1 = bitcast i16 %a to bfloat
	ret bfloat %1
	}

	define i16 @bfloat_to_i16(bfloat %a) nounwind {
	; RV32I-ILP32-LABEL: bfloat_to_i16:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_to_i16:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: ret
	%1 = bitcast bfloat %a to i16
	ret i16 %1
	}

	define bfloat @bfloat_add(bfloat %a, bfloat %b) nounwind {
	; RV32I-ILP32-LABEL: bfloat_add:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: slli a0, a0, 16
	; RV32I-ILP32-NEXT: slli a1, a1, 16
	; RV32I-ILP32-NEXT: call __addsf3@plt
	; RV32I-ILP32-NEXT: call __truncsfbf2@plt
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_add:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: slliw a0, a0, 16
	; RV64I-LP64-NEXT: slliw a1, a1, 16
	; RV64I-LP64-NEXT: call __addsf3@plt
	; RV64I-LP64-NEXT: call __truncsfbf2@plt
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = fadd bfloat %a, %b
	ret bfloat %1
	}

	define bfloat @bfloat_load(ptr %a) nounwind {
	; RV32I-ILP32-LABEL: bfloat_load:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: lh a1, 0(a0)
	; RV32I-ILP32-NEXT: lh a2, 6(a0)
	; RV32I-ILP32-NEXT: slli a0, a1, 16
	; RV32I-ILP32-NEXT: slli a1, a2, 16
	; RV32I-ILP32-NEXT: call __addsf3@plt
	; RV32I-ILP32-NEXT: call __truncsfbf2@plt
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_load:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: lh a1, 0(a0)
	; RV64I-LP64-NEXT: lh a2, 6(a0)
	; RV64I-LP64-NEXT: slliw a0, a1, 16
	; RV64I-LP64-NEXT: slliw a1, a2, 16
	; RV64I-LP64-NEXT: call __addsf3@plt
	; RV64I-LP64-NEXT: call __truncsfbf2@plt
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = load bfloat, ptr %a
	%2 = getelementptr bfloat, ptr %a, i32 3
	%3 = load bfloat, ptr %2
	%4 = fadd bfloat %1, %3
	ret bfloat %4
	}

	define void @bfloat_store(ptr %a, bfloat %b, bfloat %c) nounwind {
	; RV32I-ILP32-LABEL: bfloat_store:
	; RV32I-ILP32: # %bb.0:
	; RV32I-ILP32-NEXT: addi sp, sp, -16
	; RV32I-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
	; RV32I-ILP32-NEXT: mv s0, a0
	; RV32I-ILP32-NEXT: slli a0, a1, 16
	; RV32I-ILP32-NEXT: slli a1, a2, 16
	; RV32I-ILP32-NEXT: call __addsf3@plt
	; RV32I-ILP32-NEXT: call __truncsfbf2@plt
	; RV32I-ILP32-NEXT: sh a0, 0(s0)
	; RV32I-ILP32-NEXT: sh a0, 16(s0)
	; RV32I-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
	; RV32I-ILP32-NEXT: addi sp, sp, 16
	; RV32I-ILP32-NEXT: ret
	;
	; RV64I-LP64-LABEL: bfloat_store:
	; RV64I-LP64: # %bb.0:
	; RV64I-LP64-NEXT: addi sp, sp, -16
	; RV64I-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill
	; RV64I-LP64-NEXT: mv s0, a0
	; RV64I-LP64-NEXT: slliw a0, a1, 16
	; RV64I-LP64-NEXT: slliw a1, a2, 16
	; RV64I-LP64-NEXT: call __addsf3@plt
	; RV64I-LP64-NEXT: call __truncsfbf2@plt
	; RV64I-LP64-NEXT: sh a0, 0(s0)
	; RV64I-LP64-NEXT: sh a0, 16(s0)
	; RV64I-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload
	; RV64I-LP64-NEXT: addi sp, sp, 16
	; RV64I-LP64-NEXT: ret
	%1 = fadd bfloat %b, %c
	store bfloat %1, ptr %a
	%2 = getelementptr bfloat, ptr %a, i32 8
	store bfloat %1, ptr %2
	ret void
	}