| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5 |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx900 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX900 %s |
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1010 -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCN-GFX1010 %s |
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefix=AMDGCNSPIRV %s |
| |
| // Test that, depending on triple and, if applicable, target-cpu, one of three |
| // things happens: |
| // 1) for gfx900 we emit a call to trap (concrete target, matches) |
| // 2) for gfx1010 we emit an empty kernel (concrete target, does not match) |
| // 3) for AMDGCNSPIRV we emit a boolean specialisation constant, via a call |
| // to __spirv_SpecConstant, with the id of UINT32_MAX, and the boolean |
| // value of false, which will yield an OpSpecConstantFalse in SPIR-V |
| // AMDGCN-GFX900-LABEL: define dso_local void @foo( |
| // AMDGCN-GFX900-SAME: ) #[[ATTR0:[0-9]+]] { |
| // AMDGCN-GFX900-NEXT: [[ENTRY:.*:]] |
| // AMDGCN-GFX900-NEXT: call void @llvm.trap() |
| // AMDGCN-GFX900-NEXT: ret void |
| // |
| // AMDGCN-GFX1010-LABEL: define dso_local void @foo( |
| // AMDGCN-GFX1010-SAME: ) #[[ATTR0:[0-9]+]] { |
| // AMDGCN-GFX1010-NEXT: [[ENTRY:.*:]] |
| // AMDGCN-GFX1010-NEXT: ret void |
| // |
| // AMDGCNSPIRV-LABEL: define spir_func void @foo( |
| // AMDGCNSPIRV-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { |
| // AMDGCNSPIRV-NEXT: [[ENTRY:.*:]] |
| // AMDGCNSPIRV-NEXT: [[TMP0:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META2:![0-9]+]]) |
| // AMDGCNSPIRV-NEXT: [[TOBOOL:%.*]] = icmp ne i1 [[TMP0]], false |
| // AMDGCNSPIRV-NEXT: br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[LOR_LHS_FALSE:.*]] |
| // AMDGCNSPIRV: [[LOR_LHS_FALSE]]: |
| // AMDGCNSPIRV-NEXT: [[TMP1:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META3:![0-9]+]]) |
| // AMDGCNSPIRV-NEXT: [[TOBOOL1:%.*]] = icmp ne i1 [[TMP1]], false |
| // AMDGCNSPIRV-NEXT: br i1 [[TOBOOL1]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE2:.*]] |
| // AMDGCNSPIRV: [[LOR_LHS_FALSE2]]: |
| // AMDGCNSPIRV-NEXT: [[TMP2:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META4:![0-9]+]]) |
| // AMDGCNSPIRV-NEXT: [[TOBOOL3:%.*]] = icmp ne i1 [[TMP2]], false |
| // AMDGCNSPIRV-NEXT: br i1 [[TOBOOL3]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE4:.*]] |
| // AMDGCNSPIRV: [[LOR_LHS_FALSE4]]: |
| // AMDGCNSPIRV-NEXT: [[TMP3:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META5:![0-9]+]]) |
| // AMDGCNSPIRV-NEXT: [[TOBOOL5:%.*]] = icmp ne i1 [[TMP3]], false |
| // AMDGCNSPIRV-NEXT: br i1 [[TOBOOL5]], label %[[IF_THEN]], label %[[LOR_LHS_FALSE6:.*]] |
| // AMDGCNSPIRV: [[LOR_LHS_FALSE6]]: |
| // AMDGCNSPIRV-NEXT: [[TMP4:%.*]] = call addrspace(4) i1 @llvm.spv.named.boolean.spec.constant(i32 -1, i1 false, metadata [[META6:![0-9]+]]) |
| // AMDGCNSPIRV-NEXT: [[TOBOOL7:%.*]] = icmp ne i1 [[TMP4]], false |
| // AMDGCNSPIRV-NEXT: br i1 [[TOBOOL7]], label %[[IF_THEN]], label %[[IF_END:.*]] |
| // AMDGCNSPIRV: [[IF_THEN]]: |
| // AMDGCNSPIRV-NEXT: call addrspace(4) void @llvm.trap() |
| // AMDGCNSPIRV-NEXT: br label %[[IF_END]] |
| // AMDGCNSPIRV: [[IF_END]]: |
| // AMDGCNSPIRV-NEXT: ret void |
| // |
| void foo() { |
| if (__builtin_amdgcn_processor_is("gfx900") || |
| __builtin_amdgcn_processor_is("gfx906") || |
| __builtin_amdgcn_processor_is("gfx90c") || |
| (__builtin_amdgcn_processor_is("gfx90a")) || |
| (__builtin_amdgcn_processor_is("gfx942"))) |
| return __builtin_trap(); |
| } |
| //. |
| // AMDGCN-GFX900: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx900" } |
| // AMDGCN-GFX900: attributes #[[ATTR1:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } |
| //. |
| // AMDGCN-GFX1010: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1010" } |
| //. |
| // AMDGCNSPIRV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+add-min-max-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-fmin-fmax-global-f32,+atomic-fmin-fmax-global-f64,+atomic-global-pk-add-bf16-inst,+bf16-cvt-insts,+bf16-pk-insts,+bf16-trans-insts,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+clusters,+cube-insts,+cvt-pknorm-vop2-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+fp8e5m3-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx1250-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+lerp-inst,+mai-insts,+mcast-load-insts,+permlane16-swap,+permlane32-swap,+pk-add-min-max-insts,+prng-inst,+qsad-insts,+s-memrealtime,+s-memtime-inst,+s-wakeup-barrier-inst,+sad-insts,+setprio-inc-wg-inst,+swmmac-gfx1200-insts,+swmmac-gfx1250-insts,+tanh-insts,+tensor-cvt-lut-insts,+transpose-load-f4f6-insts,+vmem-pref-insts,+vmem-to-lds-load-insts,+wavefrontsize32,+wavefrontsize64,+wmma-128b-insts,+wmma-256b-insts,+xf32-insts" } |
| // AMDGCNSPIRV: attributes #[[ATTR1:[0-9]+]] = { nounwind } |
| // AMDGCNSPIRV: attributes #[[ATTR2:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) } |
| //. |
| // AMDGCN-GFX900: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} |
| // AMDGCN-GFX900: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} |
| //. |
| // AMDGCN-GFX1010: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} |
| // AMDGCN-GFX1010: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} |
| //. |
| // AMDGCNSPIRV: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600} |
| // AMDGCNSPIRV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} |
| // AMDGCNSPIRV: [[META2]] = !{!"is.gfx900"} |
| // AMDGCNSPIRV: [[META3]] = !{!"is.gfx906"} |
| // AMDGCNSPIRV: [[META4]] = !{!"is.gfx90c"} |
| // AMDGCNSPIRV: [[META5]] = !{!"is.gfx90a"} |
| // AMDGCNSPIRV: [[META6]] = !{!"is.gfx942"} |
| //. |