| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 |
| ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s |
| ; RUN: opt -S -mtriple=amdgcn-- -mcpu=tahiti -passes=amdgpu-codegenprepare -denormal-fp-math-f32=ieee %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-BADFREXP %s |
| ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=dynamic %s | FileCheck -check-prefixes=CHECK,IEEE,IEEE-GOODFREXP %s |
| ; RUN: opt -S -mtriple=amdgcn-- -mcpu=hawaii -passes=amdgpu-codegenprepare -denormal-fp-math-f32=preserve-sign %s | FileCheck -check-prefixes=CHECK,DAZ %s |
| |
| ; Make sure this doesn't crash with no triple |
| ; TODO: Delete when old PM deleted |
| ; RUN: opt -amdgpu-codegenprepare -disable-output %s |
| |
| |
| define amdgpu_kernel void @noop_fdiv_fpmath(ptr addrspace(1) %out, float %a, float %b) #0 { |
| ; CHECK-LABEL: define amdgpu_kernel void @noop_fdiv_fpmath( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[MD_25ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META0:![0-9]+]] |
| ; CHECK-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %md.25ulp = fdiv float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @fdiv_fpmath_f32(ptr addrspace(1) %out, float %a, float %b) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1:![0-9]+]] |
| ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %no.md = fdiv float %a, %b |
| store volatile float %no.md, ptr addrspace(1) %out, align 4 |
| %md.half.ulp = fdiv float %a, %b, !fpmath !1 |
| store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 |
| %md.1ulp = fdiv float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 |
| %md.25ulp = fdiv float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| %md.3ulp = fdiv float %a, %b, !fpmath !3 |
| store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 |
| %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 |
| store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 |
| %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 |
| store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 |
| %no.md.arcp = fdiv arcp float %a, %b |
| store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 |
| %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 |
| store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 |
| %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 |
| store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @fdiv_fpmath_f32_flags(ptr addrspace(1) %out, float %a, float %b) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP37]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP50]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul nnan ninf float [[TMP15]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul ninf float [[TMP24]], [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul ninf float [[TMP33]], [[TMP31]] |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractvalue { float, i32 } [[TMP37]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP38]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = fmul nnan float [[TMP42]], [[TMP40]] |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = sub i32 [[TMP43]], [[TMP39]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP44]], i32 [[TMP45]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP47]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = extractvalue { float, i32 } [[TMP50]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = fmul nnan float [[TMP51]], [[TMP49]] |
| ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = sub i32 [[TMP52]], [[TMP48]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP53]], i32 [[TMP54]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_flags( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul nnan ninf float [[TMP6]], [[TMP4]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; DAZ-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 |
| ; DAZ-NEXT: [[TMP13:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; DAZ-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 |
| ; DAZ-NEXT: [[TMP17:%.*]] = fmul ninf float [[TMP15]], [[TMP13]] |
| ; DAZ-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; DAZ-NEXT: [[TMP22:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; DAZ-NEXT: [[TMP26:%.*]] = fmul nnan float [[TMP24]], [[TMP22]] |
| ; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %md.1ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.25ulp.ninf.nnan = fdiv ninf nnan float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp.ninf.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.ninf = fdiv ninf float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4 |
| |
| %md.25ulp.ninf = fdiv ninf float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp.ninf, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.nnan = fdiv nnan float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.25ulp.nnan = fdiv nnan float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp.nnan, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_fpmath(ptr addrspace(1) %out, float %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg float [[X]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fneg float [[X]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float 1.000000e+00, [[X]] |
| ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float 1.000000e+00, [[X]], !fpmath [[META1]] |
| ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn float 1.000000e+00, [[X]] |
| ; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_25ULP:%.*]] = fdiv afn float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[X]] |
| ; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_25ULP:%.*]] = fdiv fast float 1.000000e+00, [[X]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP1:%.*]] = fneg float [[X]] |
| ; DAZ-NEXT: [[NEG_MD_1ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP1]]) |
| ; DAZ-NEXT: store volatile float [[NEG_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP2:%.*]] = fneg float [[X]] |
| ; DAZ-NEXT: [[NEG_MD_25ULP:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: store volatile float [[NEG_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NEG_AFN_NO_MD:%.*]] = fdiv afn float -1.000000e+00, [[X]] |
| ; DAZ-NEXT: store volatile float [[NEG_AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NEG_AFN_25ULP:%.*]] = fdiv afn float -1.000000e+00, [[X]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[NEG_AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NEG_FAST_NO_MD:%.*]] = fdiv fast float -1.000000e+00, [[X]] |
| ; DAZ-NEXT: store volatile float [[NEG_FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %no.md = fdiv float 1.000000e+00, %x |
| store volatile float %no.md, ptr addrspace(1) %out, align 4 |
| %md.1ulp = fdiv float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 |
| %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1 |
| store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 |
| %afn.no.md = fdiv afn float 1.000000e+00, %x |
| store volatile float %afn.no.md, ptr addrspace(1) %out, align 4 |
| %afn.25ulp = fdiv afn float 1.000000e+00, %x, !fpmath !0 |
| store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4 |
| %fast.no.md = fdiv fast float 1.000000e+00, %x |
| store volatile float %fast.no.md, ptr addrspace(1) %out, align 4 |
| %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0 |
| store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4 |
| %neg.md.1ulp = fdiv float -1.000000e+00, %x, !fpmath !2 |
| store volatile float %neg.md.1ulp, ptr addrspace(1) %out, align 4 |
| %neg.md.25ulp = fdiv float -1.000000e+00, %x, !fpmath !0 |
| store volatile float %neg.md.25ulp, ptr addrspace(1) %out, align 4 |
| %neg.afn.no.md = fdiv afn float -1.000000e+00, %x |
| store volatile float %neg.afn.no.md, ptr addrspace(1) %out, align 4 |
| %neg.afn.25ulp = fdiv afn float -1.000000e+00, %x, !fpmath !0 |
| store volatile float %neg.afn.25ulp, ptr addrspace(1) %out, align 4 |
| %neg.fast.no.md = fdiv fast float -1.000000e+00, %x |
| store volatile float %neg.fast.no.md, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags(ptr addrspace(1) %out, float %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = sub i32 0, [[TMP18]] |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_flags( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %md.1ulp.ninf.nnan = fdiv ninf nnan float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.ninf = fdiv ninf float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp.ninf, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.nnan = fdiv nnan float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.nsz = fdiv nsz float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp.nsz, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_knownfinite(ptr addrspace(1) %out, |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NAN]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NAN]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_INF_NAN]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_INF_NAN]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_knownfinite( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[MD_1ULP_NO_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NAN]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NO_INF:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NO_INF_NAN:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_INF_NAN]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| float nofpclass(nan) %no.nan, |
| float nofpclass(nan) %no.inf, |
| float nofpclass(inf nan) %no.inf.nan) { |
| %md.1ulp.no.nan = fdiv float 1.000000e+00, %no.nan, !fpmath !2 |
| store volatile float %md.1ulp.no.nan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.no.inf = fdiv float 1.000000e+00, %no.inf, !fpmath !2 |
| store volatile float %md.1ulp.no.inf, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.no.inf.nan = fdiv float 1.000000e+00, %no.inf.nan, !fpmath !2 |
| store volatile float %md.1ulp.no.inf.nan, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_nozero(ptr addrspace(1) %out, |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_ZERO_SUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_ZERO_SUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nozero( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[MD_1ULP_NO_ZERO:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NO_ZERO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_ZERO_SUB]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| float nofpclass(zero) %no.zero, |
| float nofpclass(zero sub) %no.zero.sub) { |
| %md.1ulp.no.zero = fdiv float 1.000000e+00, %no.zero, !fpmath !2 |
| store volatile float %md.1ulp.no.zero, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.no.zero.sub = fdiv float 1.000000e+00, %no.zero.sub, !fpmath !2 |
| store volatile float %md.1ulp.no.zero.sub, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_nosub(ptr addrspace(1) %out, |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP11]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_SUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_SUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_NSUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_NSUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[NO_PSUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP11]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[NO_PSUB]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = sub i32 0, [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP14]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_nosub( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_SUB]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NO_NSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_NSUB]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_NO_PSUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[NO_PSUB]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| float nofpclass(sub) %no.sub, |
| float nofpclass(nsub) %no.nsub, |
| float nofpclass(psub) %no.psub) { |
| %md.1ulp.no.sub = fdiv float 1.000000e+00, %no.sub, !fpmath !2 |
| store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.no.nsub = fdiv float 1.000000e+00, %no.nsub, !fpmath !2 |
| store volatile float %md.1ulp.no.nsub, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.no.psub = fdiv float 1.000000e+00, %no.psub, !fpmath !2 |
| store volatile float %md.1ulp.no.psub, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub(ptr addrspace(1) %out, float %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 |
| call void @llvm.assume(i1 %is.not.subnormal) |
| %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2 |
| store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| ; Test if we have an assumption on the output that it's not denormal. |
| define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub(ptr addrspace(1) %out, float %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) |
| ; IEEE-GOODFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) |
| ; IEEE-BADFREXP-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_assume_nosub_assume_result_nosub( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; DAZ-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; DAZ-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; DAZ-NEXT: [[MD_1ULP_NO_SUB:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[X]]) |
| ; DAZ-NEXT: [[FABS_RESULT:%.*]] = call float @llvm.fabs.f32(float [[MD_1ULP_NO_SUB]]) |
| ; DAZ-NEXT: [[RESULT_IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_RESULT]], 0x3810000000000000 |
| ; DAZ-NEXT: call void @llvm.assume(i1 [[RESULT_IS_NOT_SUBNORMAL]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 |
| call void @llvm.assume(i1 %is.not.subnormal) |
| %md.1ulp.no.sub = fdiv float 1.000000e+00, %x, !fpmath !2 |
| |
| %fabs.result = call float @llvm.fabs.f32(float %md.1ulp.no.sub) |
| %result.is.not.subnormal = fcmp oge float %fabs.result, 0x3810000000000000 |
| call void @llvm.assume(i1 %result.is.not.subnormal) |
| store volatile float %md.1ulp.no.sub, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags(ptr addrspace(1) %out, <2 x float> %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP56:%.*]] = extractvalue { float, i32 } [[TMP54]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = sub i32 0, [[TMP5]] |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP7]], i32 [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = sub i32 0, [[TMP11]] |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call nnan ninf float @llvm.ldexp.f32.i32(float [[TMP13]], i32 [[TMP12]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP16]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP16]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP25]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call ninf float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP30]], float [[TMP29]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 0, [[TMP35]] |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP34]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP36]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP40]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call nnan float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <2 x float> poison, float [[TMP38]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP45]], float [[TMP44]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP46]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP46]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = sub i32 0, [[TMP50]] |
| ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP49]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP52]], i32 [[TMP51]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP47]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = extractvalue { float, i32 } [[TMP54]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP47]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP57:%.*]] = sub i32 0, [[TMP56]] |
| ; IEEE-BADFREXP-NEXT: [[TMP58:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP55]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP59:%.*]] = call nsz float @llvm.ldexp.f32.i32(float [[TMP58]], i32 [[TMP57]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP60:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP60]], float [[TMP59]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_flags( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP4:%.*]] = call nnan ninf float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP_NINF_NNAN:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = call ninf float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP_NINF:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP9]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP13:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; DAZ-NEXT: [[TMP14:%.*]] = call nnan float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; DAZ-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP_NNAN:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP18:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP16]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = call nsz float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = insertelement <2 x float> poison, float [[TMP18]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP_NSZ:%.*]] = insertelement <2 x float> [[TMP20]], float [[TMP19]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %md.1ulp.ninf.nnan = fdiv ninf nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 |
| store volatile <2 x float> %md.1ulp.ninf.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.ninf = fdiv ninf <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 |
| store volatile <2 x float> %md.1ulp.ninf, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.nnan = fdiv nnan <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 |
| store volatile <2 x float> %md.1ulp.nnan, ptr addrspace(1) %out, align 4 |
| |
| %md.1ulp.nsz = fdiv nsz <2 x float> <float 1.0, float 1.0>, %x, !fpmath !2 |
| store volatile <2 x float> %md.1ulp.nsz, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @fdiv_fpmath_f32_vector(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP30]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = extractvalue { float, i32 } [[TMP44]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP28]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP30]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP28]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP26]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP26]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul float [[TMP35]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = sub i32 [[TMP36]], [[TMP32]] |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP37]], i32 [[TMP38]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP41]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP44]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = fmul float [[TMP45]], [[TMP43]] |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = sub i32 [[TMP46]], [[TMP42]] |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP47]], i32 [[TMP48]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = insertelement <2 x float> poison, float [[TMP39]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP50]], float [[TMP49]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_vector( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> [[A]], [[B]] |
| ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> [[A]], [[B]], !fpmath [[META1]] |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP9]], 0 |
| ; DAZ-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP9]], 1 |
| ; DAZ-NEXT: [[TMP12:%.*]] = fmul float [[TMP10]], [[TMP8]] |
| ; DAZ-NEXT: [[TMP13:%.*]] = sub i32 [[TMP11]], [[TMP7]] |
| ; DAZ-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) |
| ; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 |
| ; DAZ-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP16]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; DAZ-NEXT: [[TMP22:%.*]] = fmul float [[TMP20]], [[TMP18]] |
| ; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] |
| ; DAZ-NEXT: [[TMP24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) |
| ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[A]], i64 0 |
| ; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[A]], i64 1 |
| ; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[B]], i64 0 |
| ; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[B]], i64 1 |
| ; DAZ-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP26]], float [[TMP28]]) |
| ; DAZ-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[TMP27]], float [[TMP29]]) |
| ; DAZ-NEXT: [[TMP32:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = insertelement <2 x float> [[TMP32]], float [[TMP31]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: ret void |
| ; |
| %no.md = fdiv <2 x float> %a, %b |
| store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 |
| %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1 |
| store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8 |
| %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2 |
| store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 8 |
| %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0 |
| store volatile <2 x float> %md.25ulp, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[MD_HALF_ULP:%.*]] = fdiv <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META1:![0-9]+]] |
| ; CHECK-NEXT: store volatile <2 x float> [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> splat (float 1.000000e+00), [[X]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x |
| store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 |
| %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1 |
| store volatile <2 x float> %md.half.ulp, ptr addrspace(1) %out, align 8 |
| %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x |
| store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8 |
| %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x |
| store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8 |
| %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 |
| store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 |
| %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0 |
| store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat(ptr addrspace(1) %out, <2 x float> %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_fpmath_vector_nonsplat( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[NO_MD:%.*]] = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[AFN_NO_MD:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]] |
| ; CHECK-NEXT: store volatile <2 x float> [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, [[X]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x |
| store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 8 |
| %afn.no.md = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x |
| store volatile <2 x float> %afn.no.md, ptr addrspace(1) %out, align 8 |
| %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x |
| store volatile <2 x float> %fast.no.md, ptr addrspace(1) %out, align 8 |
| %afn.25ulp = fdiv afn <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 |
| store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 |
| %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0 |
| store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) { |
| ; CHECK-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 |
| ; CHECK-NEXT: [[AFN_25ULP:%.*]] = fdiv afn <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: [[FAST_25ULP:%.*]] = fdiv fast <2 x float> [[X_INSERT]], [[Y]], !fpmath [[META0]] |
| ; CHECK-NEXT: store volatile <2 x float> [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; CHECK-NEXT: ret void |
| ; |
| %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0 |
| %afn.25ulp = fdiv afn <2 x float> %x.insert, %y, !fpmath !0 |
| store volatile <2 x float> %afn.25ulp, ptr addrspace(1) %out, align 8 |
| %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0 |
| store volatile <2 x float> %fast.25ulp, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rcp_fdiv_f32_vector_fpmath_partial_constant_arcp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[X_INSERT:%.*]] = insertelement <2 x float> [[X]], float 1.000000e+00, i32 0 |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X_INSERT]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]] |
| ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 |
| ; DAZ-NEXT: [[ARCP_25ULP:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[ARCP_25ULP]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: ret void |
| ; |
| %x.insert = insertelement <2 x float> %x, float 1.000000e+00, i32 0 |
| %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0 |
| store volatile <2 x float> %arcp.25ulp, ptr addrspace(1) %out, align 8 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_fpmath(ptr addrspace(1) %out, float %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]]) |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fmul contract float [[X]], [[TMP2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = fmul contract float [[TMP4]], [[TMP5]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2:![0-9]+]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MD_1ULP_MULTI_USE]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MD_1ULP_MULTI_USE]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[X]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = fmul contract float [[TMP14]], [[TMP15]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = select contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul contract float [[X]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = select contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = fmul contract float [[TMP19]], [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = fcmp contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = select contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = fmul contract float [[X]], [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = select contract i1 [[TMP21]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fmul contract float [[TMP24]], [[TMP25]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_HALF_ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_HALF_ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 0, [[TMP28]] |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_MISMATCH_MD1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_MISMATCH_MD1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = fcmp olt float [[X]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = select i1 [[TMP36]], i32 32, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call float @llvm.ldexp.f32.i32(float [[X]], i32 [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP38]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = select i1 [[TMP36]], i32 -16, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 [[TMP40]]) |
| ; IEEE-BADFREXP-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_X_NO_MD]] |
| ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_MD_1ULP_MULTI_USE:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[SQRT_MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_1ULP_MULTI_USE:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MD_1ULP_MULTI_USE]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP_MULTI_USE]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_MD_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv contract float 1.000000e+00, [[SQRT_MD_HALF_ULP]], !fpmath [[META1]] |
| ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_X_AFN_NO_MD:%.*]] = call contract afn float @llvm.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[AFN_NO_MD:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_X_AFN_NO_MD]] |
| ; DAZ-NEXT: store volatile float [[AFN_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_25ULP:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[AFN_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_X_FAST_NO_MD:%.*]] = call fast float @llvm.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[FAST_NO_MD:%.*]] = fdiv fast float 1.000000e+00, [[SQRT_X_FAST_NO_MD]] |
| ; DAZ-NEXT: store volatile float [[FAST_NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_25ULP:%.*]] = call fast float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FAST_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP1:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: [[NEG_FDIV_OPENCL:%.*]] = fneg contract float [[TMP1]] |
| ; DAZ-NEXT: store volatile float [[NEG_FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_X_HALF_ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META1]] |
| ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD0:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_HALF_ULP]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_MISMATCH_MD1:%.*]] = call afn float @llvm.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD1:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_MISMATCH_MD1]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_MISMATCH_MD2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[FDIV_SQRT_MISMATCH_MD2:%.*]] = fdiv contract afn float 1.000000e+00, [[SQRT_MISMATCH_MD2]] |
| ; DAZ-NEXT: store volatile float [[FDIV_SQRT_MISMATCH_MD2]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %sqrt.x.no.md = call contract float @llvm.sqrt.f32(float %x) |
| %no.md = fdiv contract float 1.000000e+00, %sqrt.x.no.md |
| store volatile float %no.md, ptr addrspace(1) %out, align 4 |
| |
| ; Matches the rsq instruction accuracy |
| %sqrt.md.1ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 |
| %md.1ulp = fdiv contract float 1.000000e+00, %sqrt.md.1ulp, !fpmath !2 |
| store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.md.1ulp.multi.use = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 |
| store volatile float %sqrt.md.1ulp.multi.use, ptr addrspace(1) %out, align 4 |
| %md.1ulp.multi.use = fdiv contract float 1.000000e+00, %sqrt.md.1ulp.multi.use, !fpmath !2 |
| store volatile float %md.1ulp.multi.use, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.md.25ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !0 |
| %md.25ulp = fdiv contract float 1.000000e+00, %sqrt.md.25ulp, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.md.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1 |
| %md.half.ulp = fdiv contract float 1.000000e+00, %sqrt.md.half.ulp, !fpmath !1 |
| store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.afn.no.md = call contract afn float @llvm.sqrt.f32(float %x) |
| %afn.no.md = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.no.md |
| store volatile float %afn.no.md, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.afn.25ulp = call contract afn float @llvm.sqrt.f32(float %x), !fpmath !0 |
| %afn.25ulp = fdiv contract afn float 1.000000e+00, %sqrt.x.afn.25ulp, !fpmath !0 |
| store volatile float %afn.25ulp, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.fast.no.md = call fast float @llvm.sqrt.f32(float %x) |
| %fast.no.md = fdiv fast float 1.000000e+00, %sqrt.x.fast.no.md |
| store volatile float %fast.no.md, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.fast.25ulp = call fast float @llvm.sqrt.f32(float %x), !fpmath !0 |
| %fast.25ulp = fdiv fast float 1.000000e+00, %sqrt.x.fast.25ulp, !fpmath !0 |
| store volatile float %fast.25ulp, ptr addrspace(1) %out, align 4 |
| |
| |
| ; Test mismatched metadata/flags between the sqrt and fdiv |
| |
| ; Test the expected opencl default pattern |
| %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !0 |
| store volatile float %fdiv.opencl, ptr addrspace(1) %out, align 4 |
| |
| %neg.sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %neg.fdiv.opencl = fdiv contract float -1.0, %neg.sqrt.x.3ulp, !fpmath !0 |
| store volatile float %neg.fdiv.opencl, ptr addrspace(1) %out, align 4 |
| |
| ; sqrt demands higher precision than fdiv |
| %sqrt.x.half.ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !1 |
| %fdiv.sqrt.mismatch.md0 = fdiv contract float 1.0, %sqrt.x.half.ulp, !fpmath !0 |
| store volatile float %fdiv.sqrt.mismatch.md0, ptr addrspace(1) %out, align 4 |
| |
| ; sqrt demands full precision but has afn |
| %sqrt.mismatch.md1 = call afn float @llvm.sqrt.f32(float %x) |
| %fdiv.sqrt.mismatch.md1 = fdiv contract float 1.0, %sqrt.mismatch.md1, !fpmath !0 |
| store volatile float %fdiv.sqrt.mismatch.md1, ptr addrspace(1) %out, align 4 |
| |
| ; sqrt has relaxed precision fdiv has afn only |
| %sqrt.mismatch.md2 = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 |
| %fdiv.sqrt.mismatch.md2 = fdiv contract afn float 1.0, %sqrt.mismatch.md2 |
| store volatile float %fdiv.sqrt.mismatch.md2, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_fpmath_flags(ptr addrspace(1) %out, float %x) { |
| ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags( |
| ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[TMP1:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP2:%.*]] = select nnan ninf contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract float [[X]], [[TMP2]] |
| ; IEEE-NEXT: [[TMP4:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-NEXT: [[TMP5:%.*]] = select nnan ninf contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = fmul nnan ninf contract float [[TMP4]], [[TMP5]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP6:%.*]] = fcmp ninf contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP7:%.*]] = select ninf contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP8:%.*]] = fmul ninf contract float [[X]], [[TMP7]] |
| ; IEEE-NEXT: [[TMP9:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) |
| ; IEEE-NEXT: [[TMP10:%.*]] = select ninf contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NINF:%.*]] = fmul ninf contract float [[TMP9]], [[TMP10]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP11:%.*]] = fcmp nnan contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP12:%.*]] = select nnan contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP13:%.*]] = fmul nnan contract float [[X]], [[TMP12]] |
| ; IEEE-NEXT: [[TMP14:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-NEXT: [[TMP15:%.*]] = select nnan contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = fmul nnan contract float [[TMP14]], [[TMP15]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP16:%.*]] = fcmp nsz contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP17:%.*]] = select nsz contract i1 [[TMP16]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP18:%.*]] = fmul nsz contract float [[X]], [[TMP17]] |
| ; IEEE-NEXT: [[TMP19:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP18]]) |
| ; IEEE-NEXT: [[TMP20:%.*]] = select nsz contract i1 [[TMP16]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = fmul nsz contract float [[TMP19]], [[TMP20]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP21:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP22:%.*]] = select nnan ninf contract i1 [[TMP21]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP23:%.*]] = fmul nnan ninf contract float [[X]], [[TMP22]] |
| ; IEEE-NEXT: [[TMP24:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP23]]) |
| ; IEEE-NEXT: [[TMP25:%.*]] = select nnan ninf contract i1 [[TMP21]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = fmul nnan ninf contract float [[TMP24]], [[TMP25]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP26:%.*]] = fcmp nnan ninf contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP27:%.*]] = select nnan ninf contract i1 [[TMP26]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP28:%.*]] = fmul nnan ninf contract float [[X]], [[TMP27]] |
| ; IEEE-NEXT: [[TMP29:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[TMP28]]) |
| ; IEEE-NEXT: [[TMP30:%.*]] = select nnan ninf contract i1 [[TMP26]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = fmul nnan ninf contract float [[TMP29]], [[TMP30]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_fpmath_flags( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FDIV_OPENCL_NINF_NNAN:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NINF:%.*]] = call ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NINF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NNAN:%.*]] = call nnan contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NSZ:%.*]] = call nsz contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NSZ]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX0:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NNAN_MIX1:%.*]] = call nnan ninf contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NNAN_MIX1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %sqrt.x.3ulp.ninf.nnan = call contract ninf nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl.ninf.nnan = fdiv contract ninf nnan float 1.0, %sqrt.x.3ulp.ninf.nnan, !fpmath !0 |
| store volatile float %fdiv.opencl.ninf.nnan, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.ninf = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl.ninf = fdiv contract ninf float 1.0, %sqrt.x.3ulp.ninf, !fpmath !0 |
| store volatile float %fdiv.opencl.ninf, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.nnan = call contract nnan float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl.nnan = fdiv contract nnan float 1.0, %sqrt.x.3ulp.nnan, !fpmath !0 |
| store volatile float %fdiv.opencl.nnan, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.nsz = call contract nsz float @llvm.sqrt.f32(float %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl.nsz = fdiv contract nsz float 1.0, %sqrt.x.3ulp.nsz, !fpmath !0 |
| store volatile float %fdiv.opencl.nsz, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.ninf.mix0 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 |
| %fdiv.opencl.nnan.mix0 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix0, !fpmath !0 |
| store volatile float %fdiv.opencl.nnan.mix0, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.ninf.mix1 = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !3 |
| %fdiv.opencl.nnan.mix1 = fdiv contract nnan float 1.0, %sqrt.x.3ulp.ninf.mix1, !fpmath !0 |
| store volatile float %fdiv.opencl.nnan.mix1, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define float @rsq_f32_missing_contract0(float %x) { |
| ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract0( |
| ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract0( |
| ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| ; DAZ-LABEL: define float @rsq_f32_missing_contract0( |
| ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]]) |
| ; DAZ-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| %sqrt.x.3ulp = call float @llvm.sqrt.f32(float %x), !fpmath !2 |
| %fdiv.opencl = fdiv contract float 1.0, %sqrt.x.3ulp, !fpmath !2 |
| ret float %fdiv.opencl |
| } |
| |
| define float @rsq_f32_missing_contract1(float %x) { |
| ; IEEE-GOODFREXP-LABEL: define float @rsq_f32_missing_contract1( |
| ; IEEE-GOODFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define float @rsq_f32_missing_contract1( |
| ; IEEE-BADFREXP-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract float @llvm.sqrt.f32(float [[X]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[SQRT_X_3ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[SQRT_X_3ULP]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| ; DAZ-LABEL: define float @rsq_f32_missing_contract1( |
| ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[X]]) |
| ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[SQRT_X_3ULP]]) |
| ; DAZ-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| %sqrt.x.3ulp = call contract float @llvm.sqrt.f32(float %x), !fpmath !2 |
| %fdiv.opencl = fdiv float 1.0, %sqrt.x.3ulp, !fpmath !2 |
| ret float %fdiv.opencl |
| } |
| |
| define float @rsq_f32_flag_merge(float %x) { |
| ; IEEE-LABEL: define float @rsq_f32_flag_merge( |
| ; IEEE-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[TMP1:%.*]] = fcmp ninf nsz contract olt float [[X]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP2:%.*]] = select ninf nsz contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP3:%.*]] = fmul ninf nsz contract float [[X]], [[TMP2]] |
| ; IEEE-NEXT: [[TMP4:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-NEXT: [[TMP5:%.*]] = select ninf nsz contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL:%.*]] = fmul ninf nsz contract float [[TMP4]], [[TMP5]] |
| ; IEEE-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| ; DAZ-LABEL: define float @rsq_f32_flag_merge( |
| ; DAZ-SAME: float [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = call ninf nsz contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; DAZ-NEXT: ret float [[FDIV_OPENCL]] |
| ; |
| %sqrt.x.3ulp = call contract ninf float @llvm.sqrt.f32(float %x), !fpmath !2 |
| %fdiv.opencl = fdiv contract nsz float 1.0, %sqrt.x.3ulp, !fpmath !2 |
| ret float %fdiv.opencl |
| } |
| |
| define amdgpu_kernel void @rsq_f32_knownfinite(ptr addrspace(1) %out, float nofpclass(nan) %no.nan, |
| ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite( |
| ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_NAN]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_NAN]], [[TMP2]] |
| ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = fmul contract float [[TMP4]], [[TMP5]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_INF]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_INF]], [[TMP7]] |
| ; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) |
| ; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = fmul contract float [[TMP9]], [[TMP10]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[NO_INF_NAN]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP13:%.*]] = fmul contract float [[NO_INF_NAN]], [[TMP12]] |
| ; IEEE-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = fmul contract float [[TMP14]], [[TMP15]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_knownfinite( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(nan) [[NO_NAN:%.*]], float nofpclass(nan) [[NO_INF:%.*]], float nofpclass(nan inf) [[NO_INF_NAN:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NAN]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_INF:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_INF_NAN:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_INF_NAN]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_INF_NAN]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| float nofpclass(nan) %no.inf, |
| float nofpclass(inf nan) %no.inf.nan) { |
| %sqrt.x.3ulp.no.nan = call contract float @llvm.sqrt.f32(float %no.nan), !fpmath !3 |
| %fdiv.opencl.no.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.nan, !fpmath !0 |
| store volatile float %fdiv.opencl.no.nan, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.no.inf = call contract float @llvm.sqrt.f32(float %no.inf), !fpmath !3 |
| %fdiv.opencl.no.inf = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf, !fpmath !0 |
| store volatile float %fdiv.opencl.no.inf, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.no.inf.nan = call contract float @llvm.sqrt.f32(float %no.inf.nan), !fpmath !3 |
| %fdiv.opencl.no.inf.nan = fdiv contract float 1.0, %sqrt.x.3ulp.no.inf.nan, !fpmath !0 |
| store volatile float %fdiv.opencl.no.inf.nan, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_known_nozero(ptr addrspace(1) %out, float nofpclass(zero) %no.zero, float nofpclass(zero sub) %no.zero.sub) { |
| ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero( |
| ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_ZERO]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_ZERO]], [[TMP2]] |
| ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = fmul contract float [[TMP4]], [[TMP5]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]]) |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nozero( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(zero) [[NO_ZERO:%.*]], float nofpclass(zero sub) [[NO_ZERO_SUB:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_ZERO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_ZERO_SUB]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_ZERO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %sqrt.x.3ulp.no.zero = call contract float @llvm.sqrt.f32(float %no.zero), !fpmath !3 |
| %fdiv.opencl.no.zero = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero, !fpmath !0 |
| store volatile float %fdiv.opencl.no.zero, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.no.zero.sub = call contract float @llvm.sqrt.f32(float %no.zero.sub), !fpmath !3 |
| %fdiv.opencl.no.zero.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.zero.sub, !fpmath !0 |
| store volatile float %fdiv.opencl.no.zero.sub, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_known_nosub(ptr addrspace(1) %out, float nofpclass(sub) %no.sub, float nofpclass(psub) %no.psub, float nofpclass(nsub) %no.nsub) { |
| ; IEEE-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub( |
| ; IEEE-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]]) |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP1:%.*]] = fcmp contract olt float [[NO_PSUB]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP2:%.*]] = select contract i1 [[TMP1]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP3:%.*]] = fmul contract float [[NO_PSUB]], [[TMP2]] |
| ; IEEE-NEXT: [[TMP4:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; IEEE-NEXT: [[TMP5:%.*]] = select contract i1 [[TMP1]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = fmul contract float [[TMP4]], [[TMP5]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: [[TMP6:%.*]] = fcmp contract olt float [[NO_NSUB]], 0x3810000000000000 |
| ; IEEE-NEXT: [[TMP7:%.*]] = select contract i1 [[TMP6]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-NEXT: [[TMP8:%.*]] = fmul contract float [[NO_NSUB]], [[TMP7]] |
| ; IEEE-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP8]]) |
| ; IEEE-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP6]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = fmul contract float [[TMP9]], [[TMP10]] |
| ; IEEE-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_known_nosub( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[NO_SUB:%.*]], float nofpclass(psub) [[NO_PSUB:%.*]], float nofpclass(nsub) [[NO_NSUB:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_SUB]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_PSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_PSUB]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_PSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FDIV_OPENCL_NO_NSUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[NO_NSUB]]) |
| ; DAZ-NEXT: store volatile float [[FDIV_OPENCL_NO_NSUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %no.sub), !fpmath !3 |
| %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0 |
| store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.no.psub = call contract float @llvm.sqrt.f32(float %no.psub), !fpmath !3 |
| %fdiv.opencl.no.psub = fdiv contract float 1.0, %sqrt.x.3ulp.no.psub, !fpmath !0 |
| store volatile float %fdiv.opencl.no.psub, ptr addrspace(1) %out, align 4 |
| |
| %sqrt.x.3ulp.no.nsub = call contract float @llvm.sqrt.f32(float %no.nsub), !fpmath !3 |
| %fdiv.opencl.no.nsub = fdiv contract float 1.0, %sqrt.x.3ulp.no.nsub, !fpmath !0 |
| store volatile float %fdiv.opencl.no.nsub, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_assume_nosub(ptr addrspace(1) %out, float %x) { |
| ; CHECK-LABEL: define amdgpu_kernel void @rsq_f32_assume_nosub( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X]]) |
| ; CHECK-NEXT: [[IS_NOT_SUBNORMAL:%.*]] = fcmp oge float [[FABS_X]], 0x3810000000000000 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[IS_NOT_SUBNORMAL]]) |
| ; CHECK-NEXT: [[FDIV_OPENCL_NO_SUB:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[X]]) |
| ; CHECK-NEXT: store volatile float [[FDIV_OPENCL_NO_SUB]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %fabs.x = call float @llvm.fabs.f32(float %x) |
| %is.not.subnormal = fcmp oge float %fabs.x, 0x3810000000000000 |
| call void @llvm.assume(i1 %is.not.subnormal) |
| %sqrt.x.3ulp.no.sub = call contract float @llvm.sqrt.f32(float %x), !fpmath !3 |
| %fdiv.opencl.no.sub = fdiv contract float 1.0, %sqrt.x.3ulp.no.sub, !fpmath !0 |
| store volatile float %fdiv.opencl.no.sub, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @rsq_f32_vector_fpmath(ptr addrspace(1) %out, <2 x float> %x) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = fcmp contract olt float [[TMP3]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = select contract i1 [[TMP5]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = fmul contract float [[TMP3]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = select contract i1 [[TMP5]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = fmul contract float [[TMP8]], [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fcmp contract olt float [[TMP4]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = select contract i1 [[TMP11]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = fmul contract float [[TMP4]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = select contract i1 [[TMP11]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = fmul contract float [[TMP14]], [[TMP15]] |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = fcmp contract olt float [[TMP20]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = select contract i1 [[TMP22]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul contract float [[TMP20]], [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP24]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = select contract i1 [[TMP22]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul contract float [[TMP25]], [[TMP26]] |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = fcmp contract olt float [[TMP41]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = select contract i1 [[TMP43]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul contract float [[TMP41]], [[TMP44]] |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP45]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = select contract i1 [[TMP43]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = fmul contract float [[TMP46]], [[TMP47]] |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = fcmp contract olt float [[TMP42]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = select contract i1 [[TMP49]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = fmul contract float [[TMP42]], [[TMP50]] |
| ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP51]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = select contract i1 [[TMP49]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP54:%.*]] = fmul contract float [[TMP52]], [[TMP53]] |
| ; IEEE-BADFREXP-NEXT: [[TMP55:%.*]] = insertelement <2 x float> poison, float [[TMP48]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP55]], float [[TMP54]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @rsq_f32_vector_fpmath( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[SQRT_X_NO_MD:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]) |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv contract <2 x float> splat (float 1.000000e+00), [[SQRT_X_NO_MD]] |
| ; DAZ-NEXT: store volatile <2 x float> [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_MD_1ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META2:![0-9]+]] |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP8]]) |
| ; DAZ-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP9]]) |
| ; DAZ-NEXT: [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0 |
| ; DAZ-NEXT: [[SQRT_MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1 |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 0 |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <2 x float> [[SQRT_MD_1ULP_UNDEF]], i64 1 |
| ; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]]) |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 |
| ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 |
| ; DAZ-NEXT: [[TMP19:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 |
| ; DAZ-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 |
| ; DAZ-NEXT: [[TMP23:%.*]] = fmul contract float [[TMP21]], [[TMP19]] |
| ; DAZ-NEXT: [[TMP24:%.*]] = sub i32 [[TMP22]], [[TMP18]] |
| ; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP23]], i32 [[TMP24]]) |
| ; DAZ-NEXT: [[TMP26:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0 |
| ; DAZ-NEXT: [[MD_1ULP_UNDEF:%.*]] = insertelement <2 x float> [[TMP26]], float [[TMP25]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[MD_1ULP_UNDEF]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[SQRT_X_3ULP:%.*]] = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> [[X]]), !fpmath [[META3:![0-9]+]] |
| ; DAZ-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 0 |
| ; DAZ-NEXT: [[TMP28:%.*]] = extractelement <2 x float> [[SQRT_X_3ULP]], i64 1 |
| ; DAZ-NEXT: [[TMP29:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP29]]) |
| ; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP30]]) |
| ; DAZ-NEXT: [[TMP33:%.*]] = insertelement <2 x float> poison, float [[TMP31]], i64 0 |
| ; DAZ-NEXT: [[FDIV_OPENCL:%.*]] = insertelement <2 x float> [[TMP33]], float [[TMP32]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[FDIV_OPENCL]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %sqrt.x.no.md = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) |
| %no.md = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.no.md |
| store volatile <2 x float> %no.md, ptr addrspace(1) %out, align 4 |
| |
| ; Matches the rsq instruction accuracy |
| %sqrt.md.1ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 |
| %md.1ulp = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.md.1ulp, !fpmath !2 |
| store volatile <2 x float> %md.1ulp, ptr addrspace(1) %out, align 4 |
| |
| ; Matches the rsq instruction accuracy |
| %sqrt.md.1ulp.undef = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !2 |
| %md.1ulp.undef = fdiv contract <2 x float> <float 1.0, float poison>, %sqrt.md.1ulp.undef, !fpmath !2 |
| store volatile <2 x float> %md.1ulp.undef, ptr addrspace(1) %out, align 4 |
| |
| ; Test mismatched metadata/flags between the sqrt and fdiv |
| |
| ; Test the expected opencl default pattern |
| %sqrt.x.3ulp = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %x), !fpmath !3 ; OpenCL default requires 3 for sqrt and 2.5 for fdiv |
| %fdiv.opencl = fdiv contract <2 x float> <float 1.0, float 1.0>, %sqrt.x.3ulp, !fpmath !0 |
| store volatile <2 x float> %fdiv.opencl, ptr addrspace(1) %out, align 4 |
| |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) { |
| ; CHECK-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd( |
| ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] |
| ; CHECK-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] |
| ; CHECK-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; CHECK-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom |
| %arcp1 = fdiv arcp float %y, %denom |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 |
| %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = sub i32 0, [[TMP15]] |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP16]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP18]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_x3( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] |
| ; DAZ-NEXT: [[TMP3:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp float [[Z]], [[TMP3]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 |
| %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 |
| %arcp2 = fdiv arcp float %z, %denom, !fpmath !0 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| store volatile float %arcp2, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd(ptr addrspace(1) %out, float %x, float %y, float %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_25ulp_nomd( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fdiv arcp float [[Y]], [[DENOM]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom, !fpmath !0 |
| %arcp1 = fdiv arcp float %y, %denom |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_nomd_25ulp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fdiv arcp float [[X]], [[DENOM]] |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP1]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom |
| %arcp1 = fdiv arcp float %y, %denom, !fpmath !0 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp(ptr addrspace(1) %out, float %x, float %y, float %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP6]] |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[TMP2:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp float [[Y]], [[TMP2]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp float %x, %denom, !fpmath !2 |
| %arcp1 = fdiv arcp float %y, %denom, !fpmath !2 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP24]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp float [[TMP1]], [[TMP10]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fmul arcp float [[TMP2]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP19]], float [[TMP18]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP24]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 0, [[TMP26]] |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP25]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul arcp float [[TMP20]], [[TMP29]] |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP23]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP23]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = sub i32 0, [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP34]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = fmul arcp float [[TMP21]], [[TMP36]] |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <2 x float> poison, float [[TMP30]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP38]], float [[TMP37]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_denom_1ulp_vector( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = fmul arcp float [[TMP1]], [[TMP5]] |
| ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul arcp float [[TMP2]], [[TMP7]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0 |
| ; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i64 1 |
| ; DAZ-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP14:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; DAZ-NEXT: [[TMP15:%.*]] = fmul arcp float [[TMP10]], [[TMP14]] |
| ; DAZ-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; DAZ-NEXT: [[TMP17:%.*]] = fmul arcp float [[TMP11]], [[TMP16]] |
| ; DAZ-NEXT: [[TMP18:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i64 0 |
| ; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP18]], float [[TMP17]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: ret void |
| ; |
| %arcp0 = fdiv arcp <2 x float> %x, %denom, !fpmath !2 |
| %arcp1 = fdiv arcp <2 x float> %y, %denom, !fpmath !2 |
| store volatile <2 x float> %arcp0, ptr addrspace(1) %out |
| store volatile <2 x float> %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp(ptr addrspace(1) %out, float %x, float %y, float %sqr.denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]]) |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3 |
| %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0 |
| %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp(ptr addrspace(1) %out, <2 x float> %x, <2 x float> %y, <2 x float> %sqr.denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = extractvalue { float, i32 } [[TMP46]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = fcmp olt float [[TMP1]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 32, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP1]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP5]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], i32 -16, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP6]], i32 [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp olt float [[TMP2]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 32, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP2]], i32 [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select i1 [[TMP9]], i32 -16, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP12]], i32 [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP8]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = sub i32 0, [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP24]], i32 [[TMP23]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul arcp contract float [[TMP16]], [[TMP25]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = sub i32 0, [[TMP29]] |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP28]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP31]], i32 [[TMP30]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = fmul arcp contract float [[TMP17]], [[TMP32]] |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP34]], float [[TMP33]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = sub i32 0, [[TMP41]] |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP40]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP43]], i32 [[TMP42]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = fmul arcp contract float [[TMP35]], [[TMP44]] |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP38]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = extractvalue { float, i32 } [[TMP46]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP38]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = sub i32 0, [[TMP48]] |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP47]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP49]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = fmul arcp contract float [[TMP36]], [[TMP51]] |
| ; IEEE-BADFREXP-NEXT: [[TMP53:%.*]] = insertelement <2 x float> poison, float [[TMP45]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP53]], float [[TMP52]], i64 1 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_vector_25ulp( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[SQR_DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i64 1 |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[X]], i64 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[X]], i64 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP6]], [[TMP10]] |
| ; DAZ-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP9]]) |
| ; DAZ-NEXT: [[TMP13:%.*]] = fmul arcp contract float [[TMP7]], [[TMP12]] |
| ; DAZ-NEXT: [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i64 0 |
| ; DAZ-NEXT: [[ARCP0:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1 |
| ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <2 x float> [[Y]], i64 0 |
| ; DAZ-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[Y]], i64 1 |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP15]], [[TMP19]] |
| ; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP18]]) |
| ; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float [[TMP16]], [[TMP21]] |
| ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <2 x float> poison, float [[TMP20]], i64 0 |
| ; DAZ-NEXT: [[ARCP1:%.*]] = insertelement <2 x float> [[TMP23]], float [[TMP22]], i64 1 |
| ; DAZ-NEXT: store volatile <2 x float> [[ARCP0]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: store volatile <2 x float> [[ARCP1]], ptr addrspace(1) [[OUT]], align 8 |
| ; DAZ-NEXT: ret void |
| ; |
| %denom = call contract <2 x float> @llvm.sqrt.v2f32(<2 x float> %sqr.denom), !fpmath !3 |
| %arcp0 = fdiv contract arcp <2 x float> %x, %denom, !fpmath !0 |
| %arcp1 = fdiv contract arcp <2 x float> %y, %denom, !fpmath !0 |
| store volatile <2 x float> %arcp0, ptr addrspace(1) %out |
| store volatile <2 x float> %arcp1, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3(ptr addrspace(1) %out, float %x, float %y, float %z, float %sqr.denom) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP6]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = fcmp olt float [[SQR_DENOM]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i32 32, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.ldexp.f32.i32(float [[SQR_DENOM]], i32 [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], i32 -16, i32 0 |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP4]], i32 [[TMP5]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP6]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 0, [[TMP8]] |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP7]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP11]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP17]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[DENOM]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @multiple_arcp_fdiv_sqrt_denom_25ulp_x3( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[X:%.*]], float [[Y:%.*]], float [[Z:%.*]], float [[SQR_DENOM:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[DENOM:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[SQR_DENOM]]) |
| ; DAZ-NEXT: [[TMP1:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP0:%.*]] = fmul arcp contract float [[X]], [[TMP1]] |
| ; DAZ-NEXT: [[TMP2:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP1:%.*]] = fmul arcp contract float [[Y]], [[TMP2]] |
| ; DAZ-NEXT: [[TMP3:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[DENOM]]) |
| ; DAZ-NEXT: [[ARCP2:%.*]] = fmul arcp contract float [[Z]], [[TMP3]] |
| ; DAZ-NEXT: store volatile float [[ARCP0]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP1]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: store volatile float [[ARCP2]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %denom = call contract float @llvm.sqrt.f32(float %sqr.denom), !fpmath !3 |
| %arcp0 = fdiv contract arcp float %x, %denom, !fpmath !0 |
| %arcp1 = fdiv contract arcp float %y, %denom, !fpmath !0 |
| %arcp2 = fdiv contract arcp float %z, %denom, !fpmath !0 |
| store volatile float %arcp0, ptr addrspace(1) %out |
| store volatile float %arcp1, ptr addrspace(1) %out |
| store volatile float %arcp2, ptr addrspace(1) %out |
| ret void |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator(<4 x float> %arg) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( |
| ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( |
| ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract float [[TMP5]], [[TMP10]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract float [[TMP12]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract float [[TMP6]], [[TMP16]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract float [[TMP18]], [[TMP19]] |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 |
| ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; DAZ-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; DAZ-NEXT: [[TMP17:%.*]] = fneg contract float [[TMP13]] |
| ; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP14]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; DAZ-NEXT: [[TMP22:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; DAZ-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; DAZ-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; DAZ-NEXT: [[TMP26:%.*]] = fmul contract float [[TMP24]], [[TMP22]] |
| ; DAZ-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP15]]) |
| ; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 |
| ; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 |
| ; DAZ-NEXT: [[TMP32:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) |
| ; DAZ-NEXT: [[TMP33:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP33]], 0 |
| ; DAZ-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP33]], 1 |
| ; DAZ-NEXT: [[TMP36:%.*]] = fmul contract float [[TMP34]], [[TMP32]] |
| ; DAZ-NEXT: [[TMP37:%.*]] = sub i32 [[TMP35]], [[TMP31]] |
| ; DAZ-NEXT: [[TMP38:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP36]], i32 [[TMP37]]) |
| ; DAZ-NEXT: [[TMP39:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0 |
| ; DAZ-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP18]], i64 1 |
| ; DAZ-NEXT: [[TMP41:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP28]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP38]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 |
| %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt(<4 x float> %arg) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( |
| ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP31]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( |
| ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp contract afn olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select contract afn i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul contract afn float [[TMP5]], [[TMP10]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select contract afn i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul contract afn float [[TMP12]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp contract afn olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select contract afn i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul contract afn float [[TMP6]], [[TMP16]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select contract afn i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul contract afn float [[TMP18]], [[TMP19]] |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = fmul contract float [[TMP26]], [[TMP24]] |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = sub i32 [[TMP27]], [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP28]], i32 [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = extractvalue { float, i32 } [[TMP31]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP32]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = fmul contract float [[TMP36]], [[TMP34]] |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = sub i32 [[TMP37]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP39]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = insertelement <4 x float> [[TMP41]], float [[TMP20]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[TMP42]], float [[TMP30]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP43]], float [[TMP40]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_sqrt( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[DENOM:%.*]] = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP9:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP5]]) |
| ; DAZ-NEXT: [[TMP10:%.*]] = call contract afn float @llvm.amdgcn.rsq.f32(float [[TMP6]]) |
| ; DAZ-NEXT: [[TMP11:%.*]] = fneg contract afn float [[TMP10]] |
| ; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; DAZ-NEXT: [[TMP15:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; DAZ-NEXT: [[TMP16:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP16]], 0 |
| ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP16]], 1 |
| ; DAZ-NEXT: [[TMP19:%.*]] = fmul contract float [[TMP17]], [[TMP15]] |
| ; DAZ-NEXT: [[TMP20:%.*]] = sub i32 [[TMP18]], [[TMP14]] |
| ; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP19]], i32 [[TMP20]]) |
| ; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 |
| ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 |
| ; DAZ-NEXT: [[TMP25:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP23]]) |
| ; DAZ-NEXT: [[TMP26:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP26]], 0 |
| ; DAZ-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP26]], 1 |
| ; DAZ-NEXT: [[TMP29:%.*]] = fmul contract float [[TMP27]], [[TMP25]] |
| ; DAZ-NEXT: [[TMP30:%.*]] = sub i32 [[TMP28]], [[TMP24]] |
| ; DAZ-NEXT: [[TMP31:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP30]]) |
| ; DAZ-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0 |
| ; DAZ-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP11]], i64 1 |
| ; DAZ-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP21]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg) |
| %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div(<4 x float> %arg) { |
| ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div( |
| ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2:![0-9]+]] |
| ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_afn_div( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract afn <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 |
| %partial.rsq = fdiv contract afn <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv(<4 x float> %arg) { |
| ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv( |
| ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_fdiv( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 |
| %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt(<4 x float> %arg) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( |
| ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = extractvalue { float, i32 } [[TMP32]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( |
| ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg contract float [[TMP2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = extractvalue { float, i32 } [[TMP32]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = fmul contract float [[TMP33]], [[TMP31]] |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = sub i32 [[TMP34]], [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP35]], i32 [[TMP36]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = insertelement <4 x float> [[TMP38]], float [[TMP17]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = insertelement <4 x float> [[TMP39]], float [[TMP27]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP40]], float [[TMP37]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_correct_sqrt( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]) |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = fneg contract float [[TMP2]] |
| ; DAZ-NEXT: [[TMP7:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP8]], 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = extractvalue { float, i32 } [[TMP8]], 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP9]]) |
| ; DAZ-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 4.000000e+00) |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; DAZ-NEXT: [[TMP15:%.*]] = fmul contract float [[TMP13]], [[TMP11]] |
| ; DAZ-NEXT: [[TMP16:%.*]] = sub i32 [[TMP14]], [[TMP10]] |
| ; DAZ-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP15]], i32 [[TMP16]]) |
| ; DAZ-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; DAZ-NEXT: [[TMP21:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; DAZ-NEXT: [[TMP22:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP22]], 0 |
| ; DAZ-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP22]], 1 |
| ; DAZ-NEXT: [[TMP25:%.*]] = fmul contract float [[TMP23]], [[TMP21]] |
| ; DAZ-NEXT: [[TMP26:%.*]] = sub i32 [[TMP24]], [[TMP20]] |
| ; DAZ-NEXT: [[TMP27:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP26]]) |
| ; DAZ-NEXT: [[TMP28:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP29:%.*]] = insertelement <4 x float> [[TMP28]], float [[TMP7]], i64 1 |
| ; DAZ-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP29]], float [[TMP17]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP27]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg) |
| %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( |
| ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = extractvalue { float, i32 } [[TMP21]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( |
| ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = fcmp arcp contract olt float [[TMP5]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = select arcp contract i1 [[TMP9]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fmul arcp contract float [[TMP5]], [[TMP10]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = select arcp contract i1 [[TMP9]], float 4.096000e+03, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = fmul arcp contract float [[TMP12]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = fcmp arcp contract olt float [[TMP6]], 0x3810000000000000 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = select arcp contract i1 [[TMP15]], float 0x4170000000000000, float 1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul arcp contract float [[TMP6]], [[TMP16]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rsq.f32(float [[TMP17]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = select arcp contract i1 [[TMP15]], float -4.096000e+03, float -1.000000e+00 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul arcp contract float [[TMP18]], [[TMP19]] |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP21]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = sub i32 0, [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP25]], i32 [[TMP24]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP26]] |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = fmul arcp contract float poison, [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP14]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP20]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP27]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 |
| ; DAZ-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[DENOM]], i64 0 |
| ; DAZ-NEXT: [[TMP13:%.*]] = extractelement <4 x float> [[DENOM]], i64 1 |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[DENOM]], i64 2 |
| ; DAZ-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[DENOM]], i64 3 |
| ; DAZ-NEXT: [[TMP16:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP12]]) |
| ; DAZ-NEXT: [[TMP17:%.*]] = fneg arcp contract float [[TMP13]] |
| ; DAZ-NEXT: [[TMP18:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP17]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; DAZ-NEXT: [[TMP20:%.*]] = fmul arcp contract float 4.000000e+00, [[TMP19]] |
| ; DAZ-NEXT: [[TMP21:%.*]] = call arcp contract float @llvm.amdgcn.rcp.f32(float [[TMP15]]) |
| ; DAZ-NEXT: [[TMP22:%.*]] = fmul arcp contract float poison, [[TMP21]] |
| ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0 |
| ; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP18]], i64 1 |
| ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP20]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 |
| %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom, !fpmath !2 |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) { |
| ; IEEE-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct( |
| ; IEEE-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-NEXT: [[DENOM:%.*]] = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> [[ARG]]), !fpmath [[META2]] |
| ; IEEE-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; IEEE-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_mixed_constant_numerator_arcp_correct( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.sqrt.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[TMP9]], float [[TMP6]], i64 1 |
| ; DAZ-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP7]], i64 2 |
| ; DAZ-NEXT: [[DENOM:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP8]], i64 3 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = fdiv arcp contract <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[DENOM]] |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %denom = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> %arg), !fpmath !2 |
| %partial.rsq = fdiv contract arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %denom |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp(<4 x float> %arg) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( |
| ; IEEE-GOODFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP18]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( |
| ; IEEE-BADFREXP-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP1]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = sub i32 0, [[TMP7]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP9]], i32 [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = fneg arcp float [[TMP2]] |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP18]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP3]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 0, [[TMP20]] |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP19]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = fmul arcp float 4.000000e+00, [[TMP23]] |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = sub i32 0, [[TMP27]] |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP26]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP29]], i32 [[TMP28]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = fmul arcp float poison, [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = insertelement <4 x float> poison, float [[TMP10]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> [[TMP32]], float [[TMP17]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP24]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP31]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RCP]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp( |
| ; DAZ-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[ARG]], i64 0 |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[ARG]], i64 1 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[ARG]], i64 2 |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[ARG]], i64 3 |
| ; DAZ-NEXT: [[TMP5:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP1]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = fneg arcp float [[TMP2]] |
| ; DAZ-NEXT: [[TMP7:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP6]]) |
| ; DAZ-NEXT: [[TMP8:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP3]]) |
| ; DAZ-NEXT: [[TMP9:%.*]] = fmul arcp float 4.000000e+00, [[TMP8]] |
| ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP11:%.*]] = fmul arcp float poison, [[TMP10]] |
| ; DAZ-NEXT: [[TMP12:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 |
| ; DAZ-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP7]], i64 1 |
| ; DAZ-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP9]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP11]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RCP]] |
| ; |
| %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg, !fpmath !2 |
| ret <4 x float> %partial.rcp |
| } |
| |
| define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct(<4 x float> %arg) { |
| ; CHECK-LABEL: define <4 x float> @rcp_f32_vector_mixed_constant_numerator_arcp_correct( |
| ; CHECK-SAME: <4 x float> [[ARG:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[PARTIAL_RCP:%.*]] = fdiv arcp <4 x float> <float 1.000000e+00, float -1.000000e+00, float 4.000000e+00, float poison>, [[ARG]] |
| ; CHECK-NEXT: ret <4 x float> [[PARTIAL_RCP]] |
| ; |
| %partial.rcp = fdiv arcp <4 x float> <float 1.0, float -1.0, float 4.0, float poison>, %arg |
| ret <4 x float> %partial.rcp |
| } |
| |
| ; Make sure we don't crash if a vector square root has a constant vecctor input |
| define <4 x float> @rsq_f32_vector_const_denom(ptr addrspace(1) %out, <2 x float> %x) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP12]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP50:%.*]] = extractvalue { float, i32 } [[TMP48]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP50]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP51:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP51]], i32 [[TMP22]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP52:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP53:%.*]] = extractvalue { float, i32 } [[TMP52]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP54:%.*]] = extractvalue { float, i32 } [[TMP52]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP53]], [[TMP28]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP54]], [[TMP31]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = extractvalue { float, i32 } [[TMP35]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @rsq_f32_vector_const_denom( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = extractvalue { float, i32 } [[TMP12]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = sub i32 0, [[TMP14]] |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP16]], i32 [[TMP15]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = fneg contract float [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP48:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP49:%.*]] = extractvalue { float, i32 } [[TMP48]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = sub i32 0, [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[TMP50:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP49]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP50]], i32 [[TMP22]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP30]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP51:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP52:%.*]] = extractvalue { float, i32 } [[TMP51]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP52]], [[TMP28]] |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP35]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP36]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP41:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP42:%.*]] = fmul contract float [[TMP40]], [[TMP38]] |
| ; IEEE-BADFREXP-NEXT: [[TMP43:%.*]] = sub i32 [[TMP41]], [[TMP37]] |
| ; IEEE-BADFREXP-NEXT: [[TMP44:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP42]], i32 [[TMP43]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP45:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP46:%.*]] = insertelement <4 x float> [[TMP45]], float [[TMP24]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP47:%.*]] = insertelement <4 x float> [[TMP46]], float [[TMP34]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP47]], float [[TMP44]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @rsq_f32_vector_const_denom( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 4.000000e+00) |
| ; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 2.000000e+00) |
| ; DAZ-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.sqrt.f32(float 8.000000e+00) |
| ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.sqrt.f32(float poison) |
| ; DAZ-NEXT: [[TMP5:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 |
| ; DAZ-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[TMP2]], i64 1 |
| ; DAZ-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP3]], i64 2 |
| ; DAZ-NEXT: [[SQRT:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP4]], i64 3 |
| ; DAZ-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[SQRT]], i64 0 |
| ; DAZ-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[SQRT]], i64 1 |
| ; DAZ-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[SQRT]], i64 2 |
| ; DAZ-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[SQRT]], i64 3 |
| ; DAZ-NEXT: [[TMP12:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; DAZ-NEXT: [[TMP13:%.*]] = fneg contract float [[TMP9]] |
| ; DAZ-NEXT: [[TMP14:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP13]]) |
| ; DAZ-NEXT: [[TMP15:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP10]]) |
| ; DAZ-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP15]], 0 |
| ; DAZ-NEXT: [[TMP17:%.*]] = extractvalue { float, i32 } [[TMP15]], 1 |
| ; DAZ-NEXT: [[TMP18:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP16]]) |
| ; DAZ-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; DAZ-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; DAZ-NEXT: [[TMP22:%.*]] = fmul contract float [[TMP20]], [[TMP18]] |
| ; DAZ-NEXT: [[TMP23:%.*]] = sub i32 [[TMP21]], [[TMP17]] |
| ; DAZ-NEXT: [[TMP24:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP22]], i32 [[TMP23]]) |
| ; DAZ-NEXT: [[TMP25:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[TMP11]]) |
| ; DAZ-NEXT: [[TMP26:%.*]] = extractvalue { float, i32 } [[TMP25]], 0 |
| ; DAZ-NEXT: [[TMP27:%.*]] = extractvalue { float, i32 } [[TMP25]], 1 |
| ; DAZ-NEXT: [[TMP28:%.*]] = call contract float @llvm.amdgcn.rcp.f32(float [[TMP26]]) |
| ; DAZ-NEXT: [[TMP29:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; DAZ-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP29]], 0 |
| ; DAZ-NEXT: [[TMP31:%.*]] = extractvalue { float, i32 } [[TMP29]], 1 |
| ; DAZ-NEXT: [[TMP32:%.*]] = fmul contract float [[TMP30]], [[TMP28]] |
| ; DAZ-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP27]] |
| ; DAZ-NEXT: [[TMP34:%.*]] = call contract float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP33]]) |
| ; DAZ-NEXT: [[TMP35:%.*]] = insertelement <4 x float> poison, float [[TMP12]], i64 0 |
| ; DAZ-NEXT: [[TMP36:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP14]], i64 1 |
| ; DAZ-NEXT: [[TMP37:%.*]] = insertelement <4 x float> [[TMP36]], float [[TMP24]], i64 2 |
| ; DAZ-NEXT: [[PARTIAL_RSQ:%.*]] = insertelement <4 x float> [[TMP37]], float [[TMP34]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[PARTIAL_RSQ]] |
| ; |
| %sqrt = call contract <4 x float> @llvm.sqrt.v4f32(<4 x float> <float 4.0, float 2.0, float 8.0, float poison>), !fpmath !2 |
| %partial.rsq = fdiv contract <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, %sqrt, !fpmath !2 |
| ret <4 x float> %partial.rsq |
| } |
| |
| define <4 x float> @fdiv_constant_f32_vector(ptr addrspace(1) %out, <2 x float> %x) { |
| ; IEEE-GOODFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP27]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2 |
| ; IEEE-GOODFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3 |
| ; IEEE-GOODFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] |
| ; |
| ; IEEE-BADFREXP-LABEL: define <4 x float> @fdiv_constant_f32_vector( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 5.000000e-01) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 5.000000e-01) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = sub i32 0, [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP5]], i32 [[TMP4]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float -2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float -2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = sub i32 0, [[TMP9]] |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP8]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 [[TMP10]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 3.200000e+01) |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float poison) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 1.000000e+01) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP24]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = extractvalue { float, i32 } [[TMP27]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float 2.000000e+00) |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = fmul float [[TMP28]], [[TMP26]] |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 [[TMP29]], [[TMP25]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP30]], i32 [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = insertelement <4 x float> poison, float [[TMP6]], i64 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = insertelement <4 x float> [[TMP33]], float [[TMP12]], i64 1 |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = insertelement <4 x float> [[TMP34]], float [[TMP22]], i64 2 |
| ; IEEE-BADFREXP-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP35]], float [[TMP32]], i64 3 |
| ; IEEE-BADFREXP-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] |
| ; |
| ; DAZ-LABEL: define <4 x float> @fdiv_constant_f32_vector( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], <2 x float> [[X:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[TMP1:%.*]] = call float @llvm.amdgcn.rcp.f32(float 5.000000e-01) |
| ; DAZ-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.rcp.f32(float -2.000000e+00) |
| ; DAZ-NEXT: [[TMP3:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 3.200000e+01) |
| ; DAZ-NEXT: [[TMP4:%.*]] = extractvalue { float, i32 } [[TMP3]], 0 |
| ; DAZ-NEXT: [[TMP5:%.*]] = extractvalue { float, i32 } [[TMP3]], 1 |
| ; DAZ-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP4]]) |
| ; DAZ-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float poison) |
| ; DAZ-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 |
| ; DAZ-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 |
| ; DAZ-NEXT: [[TMP10:%.*]] = fmul float [[TMP8]], [[TMP6]] |
| ; DAZ-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[TMP5]] |
| ; DAZ-NEXT: [[TMP12:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP10]], i32 [[TMP11]]) |
| ; DAZ-NEXT: [[TMP13:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 1.000000e+01) |
| ; DAZ-NEXT: [[TMP14:%.*]] = extractvalue { float, i32 } [[TMP13]], 0 |
| ; DAZ-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP13]], 1 |
| ; DAZ-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP14]]) |
| ; DAZ-NEXT: [[TMP17:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float 2.000000e+00) |
| ; DAZ-NEXT: [[TMP18:%.*]] = extractvalue { float, i32 } [[TMP17]], 0 |
| ; DAZ-NEXT: [[TMP19:%.*]] = extractvalue { float, i32 } [[TMP17]], 1 |
| ; DAZ-NEXT: [[TMP20:%.*]] = fmul float [[TMP18]], [[TMP16]] |
| ; DAZ-NEXT: [[TMP21:%.*]] = sub i32 [[TMP19]], [[TMP15]] |
| ; DAZ-NEXT: [[TMP22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP20]], i32 [[TMP21]]) |
| ; DAZ-NEXT: [[TMP23:%.*]] = insertelement <4 x float> poison, float [[TMP1]], i64 0 |
| ; DAZ-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP2]], i64 1 |
| ; DAZ-NEXT: [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP12]], i64 2 |
| ; DAZ-NEXT: [[CONST_PARTIAL_RCP:%.*]] = insertelement <4 x float> [[TMP25]], float [[TMP22]], i64 3 |
| ; DAZ-NEXT: ret <4 x float> [[CONST_PARTIAL_RCP]] |
| ; |
| %const.partial.rcp = fdiv <4 x float> <float 1.0, float -1.0, float poison, float 2.0>, <float 0.5, float 2.0, float 32.0, float 10.0>, !fpmath !2 |
| ret <4 x float> %const.partial.rcp |
| } |
| |
| define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs(ptr addrspace(1) %out, float nofpclass(sub) %a, float %b) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_lhs( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float nofpclass(sub) [[A:%.*]], float [[B:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %no.md = fdiv float %a, %b |
| store volatile float %no.md, ptr addrspace(1) %out, align 4 |
| %md.half.ulp = fdiv float %a, %b, !fpmath !1 |
| store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 |
| %md.1ulp = fdiv float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 |
| %md.25ulp = fdiv float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| %md.3ulp = fdiv float %a, %b, !fpmath !3 |
| store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 |
| %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 |
| store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 |
| %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 |
| store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 |
| %no.md.arcp = fdiv arcp float %a, %b |
| store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 |
| %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 |
| store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 |
| %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 |
| store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, float %a, float nofpclass(sub) %b) { |
| ; IEEE-GOODFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( |
| ; IEEE-GOODFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP16:%.*]] = extractvalue { float, i32 } [[TMP14]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP19]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-GOODFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP30:%.*]] = extractvalue { float, i32 } [[TMP28]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-GOODFREXP-NEXT: [[TMP36:%.*]] = extractvalue { float, i32 } [[TMP34]], 1 |
| ; IEEE-GOODFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-GOODFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-GOODFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-GOODFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-GOODFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-GOODFREXP-NEXT: ret void |
| ; |
| ; IEEE-BADFREXP-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( |
| ; IEEE-BADFREXP-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { |
| ; IEEE-BADFREXP-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; IEEE-BADFREXP-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; IEEE-BADFREXP-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP11]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP14:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP15:%.*]] = extractvalue { float, i32 } [[TMP14]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP17:%.*]] = fmul float [[TMP15]], [[TMP13]] |
| ; IEEE-BADFREXP-NEXT: [[TMP18:%.*]] = sub i32 [[TMP16]], [[TMP12]] |
| ; IEEE-BADFREXP-NEXT: [[MD_25ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP17]], i32 [[TMP18]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP19:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP20:%.*]] = extractvalue { float, i32 } [[TMP19]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP20]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[A]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP26:%.*]] = fmul float [[TMP24]], [[TMP22]] |
| ; IEEE-BADFREXP-NEXT: [[TMP27:%.*]] = sub i32 [[TMP25]], [[TMP21]] |
| ; IEEE-BADFREXP-NEXT: [[MD_3ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP26]], i32 [[TMP27]]) |
| ; IEEE-BADFREXP-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP28:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP29:%.*]] = extractvalue { float, i32 } [[TMP28]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP30:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP31:%.*]] = sub i32 0, [[TMP30]] |
| ; IEEE-BADFREXP-NEXT: [[TMP32:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP29]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP33:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP32]], i32 [[TMP31]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP33]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: [[TMP34:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP35:%.*]] = extractvalue { float, i32 } [[TMP34]], 0 |
| ; IEEE-BADFREXP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float [[B]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP37:%.*]] = sub i32 0, [[TMP36]] |
| ; IEEE-BADFREXP-NEXT: [[TMP38:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[TMP35]]) |
| ; IEEE-BADFREXP-NEXT: [[TMP39:%.*]] = call arcp float @llvm.ldexp.f32.i32(float [[TMP38]], i32 [[TMP37]]) |
| ; IEEE-BADFREXP-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP39]] |
| ; IEEE-BADFREXP-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; IEEE-BADFREXP-NEXT: ret void |
| ; |
| ; DAZ-LABEL: define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs( |
| ; DAZ-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float nofpclass(sub) [[B:%.*]]) #[[ATTR1]] { |
| ; DAZ-NEXT: [[NO_MD:%.*]] = fdiv float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_HALF_ULP:%.*]] = fdiv float [[A]], [[B]], !fpmath [[META1]] |
| ; DAZ-NEXT: store volatile float [[MD_HALF_ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP1:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[B]]) |
| ; DAZ-NEXT: [[TMP2:%.*]] = extractvalue { float, i32 } [[TMP1]], 0 |
| ; DAZ-NEXT: [[TMP3:%.*]] = extractvalue { float, i32 } [[TMP1]], 1 |
| ; DAZ-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[TMP2]]) |
| ; DAZ-NEXT: [[TMP5:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[A]]) |
| ; DAZ-NEXT: [[TMP6:%.*]] = extractvalue { float, i32 } [[TMP5]], 0 |
| ; DAZ-NEXT: [[TMP7:%.*]] = extractvalue { float, i32 } [[TMP5]], 1 |
| ; DAZ-NEXT: [[TMP8:%.*]] = fmul float [[TMP6]], [[TMP4]] |
| ; DAZ-NEXT: [[TMP9:%.*]] = sub i32 [[TMP7]], [[TMP3]] |
| ; DAZ-NEXT: [[MD_1ULP:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 [[TMP9]]) |
| ; DAZ-NEXT: store volatile float [[MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_25ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[MD_3ULP:%.*]] = call float @llvm.amdgcn.fdiv.fast(float [[A]], float [[B]]) |
| ; DAZ-NEXT: store volatile float [[MD_3ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[FAST_MD_25ULP:%.*]] = fdiv fast float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[FAST_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[AFN_MD_25ULP:%.*]] = fdiv afn float [[A]], [[B]], !fpmath [[META0]] |
| ; DAZ-NEXT: store volatile float [[AFN_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[NO_MD_ARCP:%.*]] = fdiv arcp float [[A]], [[B]] |
| ; DAZ-NEXT: store volatile float [[NO_MD_ARCP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP10:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_25ULP:%.*]] = fmul arcp float [[A]], [[TMP10]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_25ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: [[TMP11:%.*]] = call arcp float @llvm.amdgcn.rcp.f32(float [[B]]) |
| ; DAZ-NEXT: [[ARCP_MD_1ULP:%.*]] = fmul arcp float [[A]], [[TMP11]] |
| ; DAZ-NEXT: store volatile float [[ARCP_MD_1ULP]], ptr addrspace(1) [[OUT]], align 4 |
| ; DAZ-NEXT: ret void |
| ; |
| %no.md = fdiv float %a, %b |
| store volatile float %no.md, ptr addrspace(1) %out, align 4 |
| %md.half.ulp = fdiv float %a, %b, !fpmath !1 |
| store volatile float %md.half.ulp, ptr addrspace(1) %out, align 4 |
| %md.1ulp = fdiv float %a, %b, !fpmath !2 |
| store volatile float %md.1ulp, ptr addrspace(1) %out, align 4 |
| %md.25ulp = fdiv float %a, %b, !fpmath !0 |
| store volatile float %md.25ulp, ptr addrspace(1) %out, align 4 |
| %md.3ulp = fdiv float %a, %b, !fpmath !3 |
| store volatile float %md.3ulp, ptr addrspace(1) %out, align 4 |
| %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0 |
| store volatile float %fast.md.25ulp, ptr addrspace(1) %out, align 4 |
| %afn.md.25ulp = fdiv afn float %a, %b, !fpmath !0 |
| store volatile float %afn.md.25ulp, ptr addrspace(1) %out, align 4 |
| %no.md.arcp = fdiv arcp float %a, %b |
| store volatile float %no.md.arcp, ptr addrspace(1) %out, align 4 |
| %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0 |
| store volatile float %arcp.md.25ulp, ptr addrspace(1) %out, align 4 |
| %arcp.md.1ulp = fdiv arcp float %a, %b, !fpmath !2 |
| store volatile float %arcp.md.1ulp, ptr addrspace(1) %out, align 4 |
| ret void |
| } |
| |
| declare float @llvm.sqrt.f32(float) |
| declare float @llvm.fabs.f32(float) |
| declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) |
| declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) |
| declare void @llvm.assume(i1 noundef) |
| |
| attributes #0 = { optnone noinline } |
| |
| !0 = !{float 2.500000e+00} |
| !1 = !{float 5.000000e-01} |
| !2 = !{float 1.000000e+00} |
| !3 = !{float 3.000000e+00} |