| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=instcombine < %s | FileCheck %s |
| |
| ; -------------------------------------------------------------------- |
| ; tensor_load_to_lds: D2 and D3 are zero/poison -> convert to _d2 variant |
| ; -------------------------------------------------------------------- |
| |
| define void @test_tensor_load_to_lds_d2_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_load_to_lds_d2_d3_poison(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_d3_poison( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> poison, <4 x i32> poison, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_load_to_lds_d2_zero_d3_poison(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_zero_d3_poison( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> poison, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_load_to_lds_d2_poison_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_poison_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> poison, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; non-matching patterns for tensor_load_to_lds simplification |
| ; -------------------------------------------------------------------- |
| |
| define void @test_tensor_load_to_lds_d2_zero_d3_nonzero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d3) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_zero_d3_nonzero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D3:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> zeroinitializer, <4 x i32> [[D3]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> %d3, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_load_to_lds_d2_nonzero_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d2) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_nonzero_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D2:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> [[D2]], <4 x i32> zeroinitializer, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> %d2, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_load_to_lds_d2_d3_nonzero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d2, <4 x i32> inreg %d3) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_d3_nonzero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D2:%.*]], <4 x i32> inreg [[D3:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> [[D2]], <4 x i32> [[D3]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> %d2, <4 x i32> %d3, i32 0) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; tensor_store_from_lds: D2 and D3 are zero/poison -> convert to _d2 variant |
| ; -------------------------------------------------------------------- |
| |
| define void @test_tensor_store_from_lds_d2_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_d3_poison(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_d3_poison( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> poison, <4 x i32> poison, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_zero_d3_poison(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_zero_d3_poison( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> poison, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_poison_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_poison_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> poison, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; non-matching patterns for tensor_store_from_lds simplification |
| ; -------------------------------------------------------------------- |
| |
| define void @test_tensor_store_from_lds_d2_zero_d3_nonzero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d3) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_zero_d3_nonzero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D3:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> zeroinitializer, <4 x i32> [[D3]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> %d3, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_nonzero_d3_zero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d2) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_nonzero_d3_zero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D2:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> [[D2]], <4 x i32> zeroinitializer, i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> %d2, <4 x i32> zeroinitializer, i32 0) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_d3_nonzero(<4 x i32> inreg %d0, <8 x i32> inreg %d1, <4 x i32> inreg %d2, <4 x i32> inreg %d3) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_d3_nonzero( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]], <4 x i32> inreg [[D2:%.*]], <4 x i32> inreg [[D3:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> [[D0]], <8 x i32> [[D1]], <4 x i32> [[D2]], <4 x i32> [[D3]], i32 0) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> %d2, <4 x i32> %d3, i32 0) |
| ret void |
| } |
| |
| ; -------------------------------------------------------------------- |
| ; ensure cachepolicy is preserved |
| ; -------------------------------------------------------------------- |
| |
| define void @test_tensor_load_to_lds_d2_d3_zero_cachepolicy(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_load_to_lds_d2_d3_zero_cachepolicy( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.load.to.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 1) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.load.to.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) |
| ret void |
| } |
| |
| define void @test_tensor_store_from_lds_d2_d3_zero_cachepolicy(<4 x i32> inreg %d0, <8 x i32> inreg %d1) { |
| ; CHECK-LABEL: define void @test_tensor_store_from_lds_d2_d3_zero_cachepolicy( |
| ; CHECK-SAME: <4 x i32> inreg [[D0:%.*]], <8 x i32> inreg [[D1:%.*]]) { |
| ; CHECK-NEXT: call void @llvm.amdgcn.tensor.store.from.lds.d2(<4 x i32> [[D0]], <8 x i32> [[D1]], i32 1) |
| ; CHECK-NEXT: ret void |
| ; |
| call void @llvm.amdgcn.tensor.store.from.lds(<4 x i32> %d0, <8 x i32> %d1, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, i32 1) |
| ret void |
| } |
| |
| declare void @llvm.amdgcn.tensor.load.to.lds(<4 x i32>, <8 x i32>, <4 x i32>, <4 x i32>, i32 immarg) |
| declare void @llvm.amdgcn.tensor.store.from.lds(<4 x i32>, <8 x i32>, <4 x i32>, <4 x i32>, i32 immarg) |