| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | 
 | # RUN: llc -O2 -mtriple=x86_64-unknown-unknown -mattr=+amx-tile,+amx-bf16,+avx512f, \ | 
 | # RUN: -mattr=+amx-transpose -run-pass=greedy,tileconfig -o - %s | FileCheck %s | 
 |  | 
 | --- | | 
 |   @buf = dso_local global [2048 x i8] zeroinitializer, align 16 | 
 |   @buf2 = dso_local global [2048 x i8] zeroinitializer, align 16 | 
 |  | 
 |   define dso_local void @test_tile_2rpntlvwz0(i16 noundef signext %row, i16 noundef signext %col0, i16 noundef signext %col1) local_unnamed_addr #0 { | 
 |   entry: | 
 |     %0 = tail call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16 %row, i16 %col0, i16 %col1, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf, i64 0, i64 0), i64 32) #5 | 
 |     %1 = extractvalue { x86_amx, x86_amx } %0, 0 | 
 |     %2 = extractvalue { x86_amx, x86_amx } %0, 1 | 
 |     %3 = tail call x86_amx @llvm.x86.tilezero.internal(i16 %row, i16 %col0) #5 | 
 |     %4 = tail call x86_amx @llvm.x86.tdpbssd.internal(i16 %row, i16 %col1, i16 %col0, x86_amx %3, x86_amx %1, x86_amx %2) #5 | 
 |     tail call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col0, i8* getelementptr inbounds ([2048 x i8], [2048 x i8]* @buf2, i64 0, i64 0), i64 32, x86_amx %4) #5 | 
 |     ret void | 
 |   } | 
 |  | 
 |   declare { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal(i16, i16, i16, i8*, i64) #1 | 
 |  | 
 |   declare <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx) #2 | 
 |  | 
 |   declare x86_amx @llvm.x86.tilezero.internal(i16, i16) #3 | 
 |  | 
 |   declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx) #3 | 
 |  | 
 |   declare x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32>) #2 | 
 |  | 
 |   declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx) #4 | 
 |  | 
 |   attributes #0 = { nounwind uwtable "frame-pointer"="all" "min-legal-vector-width"="8192" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+amx-bf16,+amx-int8,+amx-tile,+amx-transpose,+avx,+avx2,+avx512f,+crc32,+cx8,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+amx-tile,+amx-bf16,+avx512f,+amx-transpose" "tune-cpu"="generic" } | 
 |   attributes #1 = { argmemonly nounwind readonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } | 
 |   attributes #2 = { nounwind readnone "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } | 
 |   attributes #3 = { nounwind "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } | 
 |   attributes #4 = { argmemonly nounwind writeonly "target-features"="+amx-tile,+amx-bf16,+avx512f,+amx-transpose" } | 
 |   attributes #5 = { nounwind } | 
 |  | 
 | ... | 
 | --- | 
 | name:            test_tile_2rpntlvwz0 | 
 | alignment:       16 | 
 | exposesReturnsTwice: false | 
 | legalized:       false | 
 | regBankSelected: false | 
 | selected:        false | 
 | failedISel:      false | 
 | tracksRegLiveness: true | 
 | hasWinCFI:       false | 
 | callsEHReturn:   false | 
 | callsUnwindInit: false | 
 | hasEHContTarget: false | 
 | hasEHScopes:     false | 
 | hasEHFunclets:   false | 
 | failsVerification: false | 
 | tracksDebugUserValues: false | 
 | registers: | 
 |   - { id: 0, class: gr32, preferred-register: '' } | 
 |   - { id: 1, class: gr32, preferred-register: '' } | 
 |   - { id: 2, class: gr32, preferred-register: '' } | 
 |   - { id: 3, class: gr16, preferred-register: '' } | 
 |   - { id: 4, class: gr16, preferred-register: '' } | 
 |   - { id: 5, class: gr16, preferred-register: '' } | 
 |   - { id: 6, class: gr64, preferred-register: '' } | 
 |   - { id: 7, class: gr64_nosp, preferred-register: '' } | 
 |   - { id: 8, class: tilepair, preferred-register: '' } | 
 |   - { id: 9, class: tile, preferred-register: '' } | 
 |   - { id: 10, class: tile, preferred-register: '' } | 
 |   - { id: 11, class: tile, preferred-register: '' } | 
 |   - { id: 12, class: tile, preferred-register: '' } | 
 |   - { id: 13, class: gr64, preferred-register: '' } | 
 |   - { id: 14, class: vr512, preferred-register: '' } | 
 | liveins: | 
 |   - { reg: '$edi', virtual-reg: '%0' } | 
 |   - { reg: '$esi', virtual-reg: '%1' } | 
 |   - { reg: '$edx', virtual-reg: '%2' } | 
 | frameInfo: | 
 |   isFrameAddressTaken: false | 
 |   isReturnAddressTaken: false | 
 |   hasStackMap:     false | 
 |   hasPatchPoint:   false | 
 |   stackSize:       0 | 
 |   offsetAdjustment: 0 | 
 |   maxAlignment:    4 | 
 |   adjustsStack:    false | 
 |   hasCalls:        false | 
 |   stackProtector:  '' | 
 |   functionContext: '' | 
 |   maxCallFrameSize: 4294967295 | 
 |   cvBytesOfCalleeSavedRegisters: 0 | 
 |   hasOpaqueSPAdjustment: false | 
 |   hasVAStart:      false | 
 |   hasMustTailInVarArgFunc: false | 
 |   hasTailCall:     false | 
 |   localFrameSize:  0 | 
 |   savePoint:       '' | 
 |   restorePoint:    '' | 
 | fixedStack:      [] | 
 | stack: | 
 |   - { id: 0, name: '', type: default, offset: 0, size: 64, alignment: 4, | 
 |       stack-id: default, callee-saved-register: '', callee-saved-restored: true, | 
 |       debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } | 
 | callSites:       [] | 
 | debugValueSubstitutions: [] | 
 | constants:       [] | 
 | machineFunctionInfo: | 
 |   amxProgModel: ManagedRA | 
 | body:             | | 
 |   bb.0.entry: | 
 |     liveins: $edi, $esi, $edx | 
 |  | 
 |  | 
 |     ; CHECK-LABEL: name: test_tile_2rpntlvwz0 | 
 |     ; CHECK: liveins: $edi, $esi, $edx | 
 |     ; CHECK-NEXT: {{  $}} | 
 |     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx | 
 |     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $esi | 
 |     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY $edi | 
 |     ; CHECK-NEXT: [[AVX512_512_SET0_:%[0-9]+]]:vr512 = AVX512_512_SET0 | 
 |     ; CHECK-NEXT: VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, [[AVX512_512_SET0_]] :: (store (s512) into %stack.0, align 4) | 
 |     ; CHECK-NEXT: MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) | 
 |     ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 26, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 26, align 2, basealign 4) | 
 |     ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 53, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 53, align 1, basealign 4) | 
 |     ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 24, $noreg, [[COPY1]].sub_16bit :: (store (s512) into %stack.0 + 24, align 4) | 
 |     ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 52, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 52, align 4) | 
 |     ; CHECK-NEXT: MOV16mr %stack.0, 1, $noreg, 16, $noreg, [[COPY]].sub_16bit :: (store (s512) into %stack.0 + 16, align 4) | 
 |     ; CHECK-NEXT: MOV8mr %stack.0, 1, $noreg, 48, $noreg, [[COPY2]].sub_8bit :: (store (s512) into %stack.0 + 48, align 4) | 
 |     ; CHECK-NEXT: PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) | 
 |     ; CHECK-NEXT: [[MOV32ri64_:%[0-9]+]]:gr64 = MOV32ri64 @buf | 
 |     ; CHECK-NEXT: [[MOV32ri64_1:%[0-9]+]]:gr64_nosp = MOV32ri64 32 | 
 |     ; CHECK-NEXT: [[PT2RPNTLVWZ0V:%[0-9]+]]:tilepair = PT2RPNTLVWZ0V [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[COPY]].sub_16bit, [[MOV32ri64_]], 1, [[MOV32ri64_1]], 0, $noreg | 
 |     ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit | 
 |     ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTDPBSSDV [[COPY2]].sub_16bit, [[COPY]].sub_16bit, [[COPY1]].sub_16bit, [[PTILEZEROV]], [[PT2RPNTLVWZ0V]].sub_t0, [[PT2RPNTLVWZ0V]].sub_t1 | 
 |     ; CHECK-NEXT: [[MOV32ri64_2:%[0-9]+]]:gr64 = MOV32ri64 @buf2 | 
 |     ; CHECK-NEXT: PTILESTOREDV [[COPY2]].sub_16bit, [[COPY1]].sub_16bit, [[MOV32ri64_2]], 1, [[MOV32ri64_1]], 0, $noreg, [[PTILEZEROV]] | 
 |     ; CHECK-NEXT: RET 0 | 
 |     %2:gr32 = COPY $edx | 
 |     %1:gr32 = COPY $esi | 
 |     %0:gr32 = COPY $edi | 
 |     %14:vr512 = AVX512_512_SET0 | 
 |     VMOVUPSZmr %stack.0, 1, $noreg, 0, $noreg, %14 :: (store (s512) into %stack.0, align 4) | 
 |     MOV8mi %stack.0, 1, $noreg, 0, $noreg, 1 :: (store (s512) into %stack.0, align 4) | 
 |     PLDTILECFGV %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $tmm0, implicit-def dead $tmm1, implicit-def dead $tmm2, implicit-def dead $tmm3, implicit-def dead $tmm4, implicit-def dead $tmm5, implicit-def dead $tmm6, implicit-def dead $tmm7 :: (load (s512) from %stack.0, align 4) | 
 |     %6:gr64 = MOV32ri64 @buf | 
 |     %7:gr64_nosp = MOV32ri64 32 | 
 |     %8:tilepair = PT2RPNTLVWZ0V %0.sub_16bit, %1.sub_16bit, %2.sub_16bit, %6, 1, %7, 0, $noreg | 
 |     %12:tile = PTILEZEROV %0.sub_16bit, %1.sub_16bit | 
 |     %12:tile = PTDPBSSDV %0.sub_16bit, %2.sub_16bit, %1.sub_16bit, %12, %8.sub_t0, %8.sub_t1 | 
 |     %13:gr64 = MOV32ri64 @buf2 | 
 |     PTILESTOREDV %0.sub_16bit, %1.sub_16bit, %13, 1, %7, 0, $noreg, %12 | 
 |     RET 0 | 
 |  | 
 | ... |