| //=- AArch64SchedOlympus.td - Olympus Scheduling Defs --------*- tablegen -*-=// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file defines the scheduling model for the NVIDIA Olympus processor. |
| // All information is taken from the Olympus Software Optimisation guide: |
| // |
| // https://docs.nvidia.com/olympus-cpu-core-software-optimization-guide-dp12531-001v0-7.pdf |
| // |
| //===----------------------------------------------------------------------===// |
| |
| def OlympusModel : SchedMachineModel { |
| // NOTE: Unless otherwise stated, values unspecified in the Olympus SWOG are |
| // copied from the Neoverse V2 model. |
| let IssueWidth = 10; // Maximum macro-ops dispatched per cycle. |
| let MicroOpBufferSize = 320; // Entries in micro-op re-order buffer. |
| let LoadLatency = 4; // Optimistic load latency. |
| let MispredictPenalty = 10; // Extra cycles for mispredicted branch. |
| let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. |
| let CompleteModel = 1; |
| |
| list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, |
| [HasSVE2p1, HasSVEB16B16, |
| HasCPA, HasCSSC, |
| HasMatMulFP64]); |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // In the Olympus core, instructions are first fetched and decoded into |
| // internal macro-ops (MOps). Those MOps are then renamed and dispatched to the |
| // out-of-order portion of the core. A MOp can be split into two micro-ops |
| // (uOps) further down the pipeline after instruction decode. Once dispatched, |
| // a uOp waits for its operands to become available and issues out-of-order to |
| // one of many execution pipelines. Each execution pipeline can accept one uOp |
| // per cycle. |
| |
| let SchedModel = OlympusModel in { |
| |
| // Note: The RCU is not specified in the SWOG, therefore we assume we can commit |
| // as many MOps as we can dispatch each cycle. |
| def OlympusRCU : RetireControlUnit<OlympusModel.MicroOpBufferSize, 10>; |
| |
| // Define the issue ports. |
| def OlympusUnitB : ProcResource<4>; // Branch 0/1/2/3 |
| def OlympusUnitS : ProcResource<6>; // Integer single-cycle 0/1/2/3/4/5 |
| def OlympusUnitM0 : ProcResource<1>; // Integer single/multicycle 0 |
| def OlympusUnitM1 : ProcResource<1>; // Integer single/multicycle 1 |
| def OlympusUnitL : ProcResource<4>; // Load 0/1/2/3 |
| def OlympusUnitSA : ProcResource<2>; // Store 0/1 |
| def OlympusUnitD : ProcResource<2>; // Integer store data 0/1 |
| def OlympusUnitV0 : ProcResource<1>; // FP/ASIMD 0 |
| def OlympusUnitV1 : ProcResource<1>; // FP/ASIMD 1 |
| def OlympusUnitV2 : ProcResource<1>; // FP/ASIMD 2 |
| def OlympusUnitV3 : ProcResource<1>; // FP/ASIMD 3 |
| def OlympusUnitV45 : ProcResource<2>; // FP/ASIMD 4/5 |
| def OlympusUnitF : ProcResource<6>; // Flags |
| |
| def OlympusUnitM : ProcResGroup<[OlympusUnitM0, OlympusUnitM1]>; // Integer single/multicycle 0/1 |
| def OlympusUnitI : ProcResGroup<[OlympusUnitS, OlympusUnitM0, OlympusUnitM1]>; // Integer single-cycle 0/1/2/3/4/5 and single/multicycle 0/1 |
| def OlympusUnitV03 : ProcResGroup<[OlympusUnitV0, OlympusUnitV3]>; // FP/ASIMD 0/3 |
| def OlympusUnitV12 : ProcResGroup<[OlympusUnitV1, OlympusUnitV2]>; // FP/ASIMD 1/2 |
| def OlympusUnitV0123 : ProcResGroup<[OlympusUnitV0, OlympusUnitV1, OlympusUnitV2, OlympusUnitV3]>; // FP/ASIMD 0/1/2/3 (also used for vector store data) |
| def OlympusUnitV : ProcResGroup<[OlympusUnitV0, OlympusUnitV1, OlympusUnitV2, OlympusUnitV3, OlympusUnitV45]>; // FP/ASIMD 0/1/2/3/4/5 |
| |
| // No forwarding is provided for these types. |
| def : ReadAdvance<ReadI, 0>; |
| def : ReadAdvance<ReadISReg, 0>; |
| def : ReadAdvance<ReadIEReg, 0>; |
| def : ReadAdvance<ReadIM, 0>; |
| def : ReadAdvance<ReadIMA, 0>; |
| def : ReadAdvance<ReadID, 0>; |
| def : ReadAdvance<ReadExtrHi, 0>; |
| def : ReadAdvance<ReadAdrBase, 0>; |
| def : ReadAdvance<ReadST, 0>; |
| def : ReadAdvance<ReadVLD, 0>; |
| |
| def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } |
| def : WriteRes<WriteBarrier, []> { let Latency = 1; } |
| def : WriteRes<WriteHint, []> { let Latency = 1; } |
| def : WriteRes<WriteLDHi, []> { let Latency = 4; } |
| |
| //===----------------------------------------------------------------------===// |
| // Define customized scheduler read/write types specific to the Olympus. |
| |
| // Define generic 0 micro-op types |
| |
| let NumMicroOps = 0 in { |
| def OlympusWrite_0c : SchedWriteRes<[]> { let Latency = 0; } |
| def OlympusWrite_6c : SchedWriteRes<[]> { let Latency = 6; } |
| } // NumMicroOps = 0 |
| |
| // Define generic 1 micro-op types |
| |
| def OlympusWrite_1c_1B : SchedWriteRes<[OlympusUnitB]> { let Latency = 1; } |
| def OlympusWrite_1c_1I : SchedWriteRes<[OlympusUnitI]> { let Latency = 1; } |
| def OlympusWrite_1c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 1; } |
| def OlympusWrite_1c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 1; } |
| def OlympusWrite_1c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 1; } |
| def OlympusWrite_2c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 2; } |
| def OlympusWrite_2c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 2; } |
| def OlympusWrite_2c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 2; } |
| def OlympusWrite_2c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 2; } |
| def OlympusWrite_2c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 2; } |
| def OlympusWrite_2c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 2; } |
| def OlympusWrite_2c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 2; } |
| def OlympusWrite_3c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 3; } |
| def OlympusWrite_3c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 3; } |
| def OlympusWrite_3c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| def OlympusWrite_3c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 3; } |
| def OlympusWrite_3c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 3; } |
| def OlympusWrite_3c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 3; } |
| def OlympusWrite_3c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 3; } |
| def OlympusWrite_3c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 3; } |
| def OlympusWrite_4c_1M0 : SchedWriteRes<[OlympusUnitM0]> { let Latency = 4; } |
| def OlympusWrite_4c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 4; } |
| def OlympusWrite_4c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusWrite_4c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 4; } |
| def OlympusWrite_4c_1V0123 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusWrite_4c_1V03 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 4; } |
| def OlympusWrite_4c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 4; } |
| def OlympusWrite_5c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 5; } |
| def OlympusWrite_6c_1L : SchedWriteRes<[OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 6; } |
| def OlympusWrite_6c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 6; } |
| def OlympusWrite_6c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 6; } |
| def OlympusWrite_7c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 7; let ReleaseAtCycles = [7]; } |
| def OlympusWrite_8c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 8; } |
| def OlympusWrite_9c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 9; } |
| def OlympusWrite_10c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 10; } |
| def OlympusWrite_10c_1V1 : SchedWriteRes<[OlympusUnitV1]> { let Latency = 10; } |
| def OlympusWrite_12c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 12; let ReleaseAtCycles = [12]; } |
| def OlympusWrite_12c_1V : SchedWriteRes<[OlympusUnitV]> { let Latency = 12; } |
| def OlympusWrite_12c_1V45 : SchedWriteRes<[OlympusUnitV45]> { let Latency = 12; let ReleaseAtCycles = [12]; } |
| def OlympusWrite_13c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 13; } |
| def OlympusWrite_13c_1V12 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 13; } |
| def OlympusWrite_15c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 15; } |
| def OlympusWrite_16c_1V0 : SchedWriteRes<[OlympusUnitV0]> { let Latency = 16; } |
| def OlympusWrite_20c_1M : SchedWriteRes<[OlympusUnitM]> { let Latency = 20; let ReleaseAtCycles = [20]; } |
| def OlympusWrite_20c_1V45 : SchedWriteRes<[OlympusUnitV45]> { let Latency = 20; let ReleaseAtCycles = [20]; } |
| |
| // These types are multi-pumped, which we model by blocking the pipes for a |
| // number of cycles (c.f. §3.1.3). |
| def OlympusWrite_4c_1V0123_2 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; let ReleaseAtCycles = [2]; } |
| def OlympusWrite_5c_1V0123_2 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 5; let ReleaseAtCycles = [2]; } |
| def OlympusWrite_6c_1V0123_4 : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 6; let ReleaseAtCycles = [4]; } |
| def OlympusWrite_7c_1V03_6 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 7; let ReleaseAtCycles = [6]; } |
| def OlympusWrite_9c_1V12_2 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 9; let ReleaseAtCycles = [2]; } |
| def OlympusWrite_9c_1V12_4 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 9; let ReleaseAtCycles = [4]; } |
| def OlympusWrite_11c_1V03_10 : SchedWriteRes<[OlympusUnitV03]> { let Latency = 11; let ReleaseAtCycles = [10]; } |
| def OlympusWrite_11c_1V12_4 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 11; let ReleaseAtCycles = [4]; } |
| def OlympusWrite_13c_1V12_8 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 13; let ReleaseAtCycles = [8]; } |
| def OlympusWrite_14c_1V12_2 : SchedWriteRes<[OlympusUnitV12]> { let Latency = 14; let ReleaseAtCycles = [2]; } |
| |
| // Hack to get the flagset throughputs right. Basic flagset instructions have |
| // throughput of six, and use the I pipes. However, there are eight I pipes, |
| // which would set their throughput to eight as well. We use six artificial F |
| // pipes to get the correct throughput. |
| def OlympusWrite_1c_1F : SchedWriteRes<[OlympusUnitI, OlympusUnitF]> { let Latency = 1; } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 2 micro-op types |
| |
| let NumMicroOps = 2 in { |
| |
| def OlympusWrite_1c_1I_1B : SchedWriteRes<[OlympusUnitI, OlympusUnitB]> { let Latency = 1; } |
| def OlympusWrite_1c_1M0_1M : SchedWriteRes<[OlympusUnitM0, OlympusUnitM]> { let Latency = 1; } |
| def OlympusWrite_1c_1SA_1D : SchedWriteRes<[OlympusUnitSA, OlympusUnitD]> { let Latency = 1; } |
| def OlympusWrite_1c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 1; } |
| def OlympusWrite_2c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 2; } |
| def OlympusWrite_2c_1M0_1M : SchedWriteRes<[OlympusUnitM0, OlympusUnitM]> { let Latency = 2; } |
| def OlympusWrite_2c_1M_1V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitM]> { let Latency = 2; } |
| def OlympusWrite_2c_1V0_1M : SchedWriteRes<[OlympusUnitV0, OlympusUnitM]> { let Latency = 2; } |
| def OlympusWrite_3c_1I_1M : SchedWriteRes<[OlympusUnitI, OlympusUnitM]> { let Latency = 3; } |
| def OlympusWrite_3c_1M_1M0 : SchedWriteRes<[OlympusUnitM, OlympusUnitM0]> { let Latency = 3; } |
| def OlympusWrite_3c_1M_1V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitM]> { let Latency = 3; } |
| def OlympusWrite_3c_1SA_1V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitV0123]> { let Latency = 3; } |
| def OlympusWrite_3c_1V0_1M : SchedWriteRes<[OlympusUnitV0, OlympusUnitM]> { let Latency = 3; } |
| def OlympusWrite_3c_2V03 : SchedWriteRes<[OlympusUnitV03, OlympusUnitV03]> { let Latency = 3; } |
| def OlympusWrite_3c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 3; } |
| def OlympusWrite_4c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 4; } |
| def OlympusWrite_4c_1M_1M0 : SchedWriteRes<[OlympusUnitM, OlympusUnitM0]> { let Latency = 4; } |
| def OlympusWrite_4c_1SA_1D : SchedWriteRes<[OlympusUnitSA, OlympusUnitD]> { let Latency = 4; } |
| def OlympusWrite_4c_2M : SchedWriteRes<[OlympusUnitM, OlympusUnitM]> { let Latency = 4; } |
| def OlympusWrite_4c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 4; } |
| def OlympusWrite_4c_2V0 : SchedWriteRes<[OlympusUnitV0, OlympusUnitV0]> { let Latency = 4; } |
| def OlympusWrite_5c_1B_1M0 : SchedWriteRes<[OlympusUnitB, OlympusUnitM0]> { let Latency = 5; } |
| def OlympusWrite_5c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 5; } |
| def OlympusWrite_5c_1L_1F : SchedWriteRes<[OlympusUnitL, OlympusUnitF]> { let Latency = 5; } |
| def OlympusWrite_5c_1M0_1V : SchedWriteRes<[OlympusUnitM0, OlympusUnitV]> { let Latency = 5; } |
| def OlympusWrite_5c_1M_1L : SchedWriteRes<[OlympusUnitM, OlympusUnitL]> { let Latency = 5; } |
| def OlympusWrite_5c_1M_1V : SchedWriteRes<[OlympusUnitM, OlympusUnitV]> { let Latency = 5; } |
| def OlympusWrite_5c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 5; } |
| def OlympusWrite_5c_2V0 : SchedWriteRes<[OlympusUnitV0, OlympusUnitV0]> { let Latency = 5; } |
| def OlympusWrite_5c_1V_1V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusWrite_6c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_1L_1S : SchedWriteRes<[OlympusUnitL, OlympusUnitS]> { let Latency = 6; } |
| def OlympusWrite_6c_1V03_1V12 : SchedWriteRes<[OlympusUnitV03, OlympusUnitV12]> { let Latency = 6; } |
| def OlympusWrite_6c_1V1_1M0 : SchedWriteRes<[OlympusUnitV1, OlympusUnitM0]> { let Latency = 6; } |
| def OlympusWrite_6c_1V_1V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV0123]> { let Latency = 6; } |
| def OlympusWrite_6c_2L : SchedWriteRes<[OlympusUnitL, OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_2V : SchedWriteRes<[OlympusUnitV, OlympusUnitV]> { let Latency = 6; } |
| def OlympusWrite_6c_2V12 : SchedWriteRes<[OlympusUnitV12, OlympusUnitV12]> { let Latency = 6; } |
| def OlympusWrite_6c_2V0123 : SchedWriteRes<[OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 6; } |
| def OlympusWrite_7c_1F_1L : SchedWriteRes<[OlympusUnitF, OlympusUnitL]> { let Latency = 7; } |
| def OlympusWrite_7c_1I_1L : SchedWriteRes<[OlympusUnitI, OlympusUnitL]> { let Latency = 7; } |
| def OlympusWrite_7c_1M_1V0123 : SchedWriteRes<[OlympusUnitM, OlympusUnitV0123]> { let Latency = 7; } |
| def OlympusWrite_8c_1L_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_8c_1M0_1L : SchedWriteRes<[OlympusUnitM0, OlympusUnitL]> { let Latency = 8; } |
| |
| // These types are multi-pumped. |
| def OlympusWrite_8c_1M_1V0123_2 : SchedWriteRes<[OlympusUnitM, OlympusUnitV0123]> { let Latency = 8; let ReleaseAtCycles = [1, 2]; } |
| |
| } // NumMicroOps = 2 |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 3 micro-op types |
| |
| let NumMicroOps = 3 in { |
| |
| def OlympusWrite_2c_1L_1S_1V : SchedWriteRes<[OlympusUnitL, OlympusUnitS, OlympusUnitV]> { let Latency = 2; } |
| def OlympusWrite_2c_1SA_1D_1I : SchedWriteRes<[OlympusUnitSA, OlympusUnitD, OlympusUnitI]> { let Latency = 2; } |
| def OlympusWrite_3c_1I_1SA_1V0123 : SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitV0123]> { let Latency = 3; } |
| def OlympusWrite_4c_3V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 4; } |
| def OlympusWrite_5c_1I_1B_1M0 : SchedWriteRes<[OlympusUnitI, OlympusUnitB, OlympusUnitM0]> { let Latency = 5; } |
| def OlympusWrite_5c_1SA_1V_1V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitV, OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusWrite_6c_3L : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_3V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 6; } |
| def OlympusWrite_7c_2M_1V : SchedWriteRes<[OlympusUnitM, OlympusUnitM, OlympusUnitV]> { let Latency = 7; } |
| def OlympusWrite_8c_1L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_8c_1M_1V03_1V12 : SchedWriteRes<[OlympusUnitM, OlympusUnitV03, OlympusUnitV12]> { let Latency = 8; } |
| def OlympusWrite_8c_2V_1V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV0123]> { let Latency = 8; } |
| def OlympusWrite_9c_1L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 9; } |
| def OlympusWrite_9c_2L_1V03 : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV03]> { let Latency = 9; } |
| def OlympusWrite_9c_1V_2V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 9; } |
| |
| } // NumMicroOps = 3 |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 4 micro-op types |
| |
| let NumMicroOps = 4 in { |
| |
| def OlympusWrite_2c_2L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 2; } |
| def OlympusWrite_3c_2SA_2V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 3; } |
| def OlympusWrite_4c_2L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 4; } |
| def OlympusWrite_4c_2SA_2V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusWrite_5c_1I_3L : SchedWriteRes<[OlympusUnitI, OlympusUnitL, OlympusUnitL, OlympusUnitL]> { let Latency = 5; } |
| def OlympusWrite_6c_2I_2L : SchedWriteRes<[OlympusUnitI, OlympusUnitI, OlympusUnitL, OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_4L : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitL]> { let Latency = 6; } |
| def OlympusWrite_6c_4V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 6; } |
| def OlympusWrite_6c_4V0 : SchedWriteRes<[OlympusUnitV0, OlympusUnitV0, OlympusUnitV0, OlympusUnitV0]> { let Latency = 6; } |
| def OlympusWrite_7c_1SA_2V_1V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitV, OlympusUnitV, OlympusUnitV0123]> { let Latency = 7; } |
| def OlympusWrite_8c_2L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_8c_2V_2V1 : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV1, OlympusUnitV1]> { let Latency = 8; } |
| def OlympusWrite_8c_4V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_9c_2L_2V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 9; } |
| def OlympusWrite_9c_2L_2V1 : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV1, OlympusUnitV1]> { let Latency = 9; } |
| def OlympusWrite_9c_2V_2V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 9; } |
| def OlympusWrite_11c_2V_2V0123 : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 11; } |
| |
| } // NumMicroOps = 4 |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 5 micro-op types |
| |
| let NumMicroOps = 5 in { |
| |
| def OlympusWrite_4c_2SA_1V03_2V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitV03, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusWrite_6c_5V : SchedWriteRes<[OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 6; } |
| def OlympusWrite_8c_1L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_8c_2L_3V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_9c_1I_2L_2V : SchedWriteRes<[OlympusUnitI, OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV]> { let Latency = 9; } |
| def OlympusWrite_9c_4L_1V03 : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitV03]> { let Latency = 9; } |
| def OlympusWrite_10c_1L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 10; } |
| |
| } // NumMicroOps = 5 |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 6 micro-op types |
| |
| let NumMicroOps = 6 in { |
| |
| def OlympusWrite_2c_3L_3V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 2; } |
| def OlympusWrite_4c_3SA_3V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusWrite_5c_2SA_2V_2V0123 : SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitV, OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusWrite_8c_2L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_8c_3L_3V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitV, OlympusUnitV, OlympusUnitV]> { let Latency = 8; } |
| def OlympusWrite_9c_4L_2V03 : SchedWriteRes<[OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitL, OlympusUnitV03, OlympusUnitV03]> { let Latency = 9; } |
| |
| } // NumMicroOps = 6 |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 7 micro-op types |
| |
| def OlympusWrite_5c_1I_2SA_2V_2V0123 : |
| SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitSA, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 5; |
| let NumMicroOps = 7; |
| } |
| |
| def OlympusWrite_8c_3L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV]> { |
| let Latency = 8; |
| let NumMicroOps = 7; |
| } |
| |
| def OlympusWrite_9c_1I_3L_3V : SchedWriteRes<[OlympusUnitI, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV]> { |
| let Latency = 9; |
| let NumMicroOps = 7; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 8 micro-op types |
| |
| def OlympusWrite_2c_4L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV]> { |
| let Latency = 2; |
| let NumMicroOps = 8; |
| } |
| |
| def OlympusWrite_4c_4SA_4V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123]> { |
| let Latency = 4; |
| let NumMicroOps = 8; |
| } |
| |
| def OlympusWrite_7c_2SA_4V_2V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV0123, |
| OlympusUnitV0123]> { |
| let Latency = 7; |
| let NumMicroOps = 8; |
| } |
| |
| def OlympusWrite_8c_4L_4V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV]> { |
| let Latency = 8; |
| let NumMicroOps = 8; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 9 micro-op types |
| |
| def OlympusWrite_5c_4SA_1V03_4V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV03, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 5; |
| let NumMicroOps = 9; |
| } |
| |
| def OlympusWrite_6c_3SA_3V_3V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV0123, |
| OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 6; |
| let NumMicroOps = 9; |
| } |
| |
| def OlympusWrite_10c_1L_8V : SchedWriteRes<[OlympusUnitL, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV]> { |
| let Latency = 10; |
| let NumMicroOps = 9; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 10 micro-op types |
| |
| def OlympusWrite_5c_4SA_2V03_4V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV03, OlympusUnitV03, OlympusUnitV0123, |
| OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 5; |
| let NumMicroOps = 10; |
| } |
| |
| def OlympusWrite_6c_1I_3SA_3V_3V0123 : |
| SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 6; |
| let NumMicroOps = 10; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 12 micro-op types |
| |
| def OlympusWrite_6c_4SA_4V_4V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123]> { |
| let Latency = 6; |
| let NumMicroOps = 12; |
| } |
| |
| def OlympusWrite_9c_4L_8V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV]> { |
| let Latency = 9; |
| let NumMicroOps = 12; |
| } |
| |
| def OlympusWrite_10c_4L_8V : SchedWriteRes<[OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV]> { |
| let Latency = 10; |
| let NumMicroOps = 12; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 13 micro-op types |
| |
| def OlympusWrite_6c_1I_4SA_4V_4V0123 : |
| SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitSA, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 6; |
| let NumMicroOps = 13; |
| } |
| |
| def OlympusWrite_10c_1I_4L_8V : SchedWriteRes<[OlympusUnitI, OlympusUnitL, |
| OlympusUnitL, OlympusUnitL, |
| OlympusUnitL, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, |
| OlympusUnitV]> { |
| let Latency = 10; |
| let NumMicroOps = 13; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 16 micro-op types |
| |
| def OlympusWrite_8c_4SA_8V_4V0123 : |
| SchedWriteRes<[OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV0123, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123]> { |
| let Latency = 8; |
| let NumMicroOps = 16; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define generic 17 micro-op types |
| |
| def OlympusWrite_8c_1I_4SA_8V_4V0123 : |
| SchedWriteRes<[OlympusUnitI, OlympusUnitSA, OlympusUnitSA, OlympusUnitSA, |
| OlympusUnitSA, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV, OlympusUnitV, OlympusUnitV, |
| OlympusUnitV, OlympusUnitV0123, OlympusUnitV0123, |
| OlympusUnitV0123, OlympusUnitV0123]> { |
| let Latency = 8; |
| let NumMicroOps = 17; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Define predicates |
| |
| // Check if SVE INC/DEC (scalar), ALL, {1, 2, 4, 8} |
| def OlympusIsCheapIncDec : MCSchedPredicate< |
| CheckAll<[CheckOpcode<[ |
| INCB_XPiI, INCH_XPiI, |
| INCW_XPiI, INCD_XPiI, |
| DECB_XPiI, DECH_XPiI, |
| DECW_XPiI, DECD_XPiI]>, |
| CheckImmOperand<2, 31>, |
| CheckAny<[ |
| CheckImmOperand<3, 1>, |
| CheckImmOperand<3, 2>, |
| CheckImmOperand<3, 4>, |
| CheckImmOperand<3, 8>,]>]>>; |
| |
| // Check EXTR, ROR alias or imms == 0 |
| def OlympusIsCheapExtr : MCSchedPredicate< // EXTR Rd, Rs, Rs, #Imm |
| CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>, |
| CheckAny<[CheckSameRegOperand<1, 2>, |
| CheckImmOperand<3, 0>]>]>>; |
| |
| // Check if logic shift or arithmetic LSL <= 4. |
| def OlympusIsCheapShift : MCSchedPredicate< |
| CheckAny<[IsLogicShiftOp, |
| CheckAll<[IsArithShiftOp, |
| CheckShiftLSL, |
| CheckAny<[CheckShiftBy0, |
| CheckShiftBy1, |
| CheckShiftBy2, |
| CheckShiftBy3, |
| CheckShiftBy4]>]>]>>; |
| |
| // Check if extending from W/X. |
| def OlympusIsCheapExtend : MCSchedPredicate< |
| CheckAll<[IsArithExtOp, |
| CheckAny<[CheckExtUXTW, CheckExtUXTX, |
| CheckExtSXTW, CheckExtSXTX]>]>>; |
| |
| // Check if STRH has scaled offset (shift amount of 1) |
| def OlympusIsScaledSTRH : MCSchedPredicate<CheckImmOperand<4, 1>>; |
| |
| //===----------------------------------------------------------------------===// |
| // Define predicate-controlled types |
| |
| def OlympusWrite_ScaledSTRH : SchedWriteVariant<[ |
| SchedVar<OlympusIsScaledSTRH, [OlympusWrite_2c_1SA_1D_1I]>, |
| SchedVar<NoSchedPred, [OlympusWrite_1c_1SA_1D]>]>; |
| |
| def OlympusWrite_ShiftI : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapShift, [OlympusWrite_1c_1I]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| def OlympusWrite_ShiftF : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapShift, [OlympusWrite_1c_1F]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| def OlympusWrite_ExtendI : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapExtend, [OlympusWrite_1c_1I]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| def OlympusWrite_ExtendF : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapExtend, [OlympusWrite_1c_1F]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| def OlympusWrite_Logical : SchedWriteVariant<[ |
| SchedVar<NeoverseNoLSL, [OlympusWrite_1c_1F]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| def OlympusWrite_Extr : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapExtr, [OlympusWrite_1c_1I]>, |
| SchedVar<NoSchedPred, [OlympusWrite_3c_1I_1M]>]>; |
| |
| def OlympusWrite_0or1c_1I : SchedWriteVariant<[ |
| SchedVar<NeoverseZeroMove, [OlympusWrite_0c]>, |
| SchedVar<NoSchedPred, [OlympusWrite_1c_1I]>]>; |
| |
| def OlympusWrite_0or2c_1V : SchedWriteVariant<[ |
| SchedVar<NeoverseZeroMove, [OlympusWrite_0c]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1V]>]>; |
| |
| def OlympusWrite_0or3c_1M : SchedWriteVariant<[ |
| SchedVar<NeoverseZeroMove, [OlympusWrite_0c]>, |
| SchedVar<NoSchedPred, [OlympusWrite_3c_1M]>]>; |
| |
| def OlympusWrite_2c_1V03_or_1M_1V03 : SchedWriteVariant<[ |
| SchedVar<NeoversePdIsPg, [OlympusWrite_2c_1M_1V03]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1V03]>]>; |
| |
| def OlympusWrite_IncDec : SchedWriteVariant<[ |
| SchedVar<OlympusIsCheapIncDec, [OlympusWrite_1c_1I]>, |
| SchedVar<NoSchedPred, [OlympusWrite_2c_1M]>]>; |
| |
| //===----------------------------------------------------------------------===// |
| // Define forwarded types |
| |
| def OlympusWr_FRS : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_FRS : SchedReadAdvance<2, [OlympusWr_FRS]>; |
| |
| def OlympusWr_VA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VA : SchedReadAdvance<2, [OlympusWr_VA]>; |
| |
| def OlympusWr_VPA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VPA : SchedReadAdvance<2, [OlympusWr_VPA]>; |
| |
| def OlympusWr_VSA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VSA : SchedReadAdvance<2, [OlympusWr_VSA]>; |
| |
| def OlympusWr_VDOT : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| def OlympusRd_VDOT : SchedReadAdvance<1, [OlympusWr_VDOT]>; |
| |
| def OlympusWr_VMMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| def OlympusRd_VMMA : SchedReadAdvance<1, [OlympusWr_VMMA]>; |
| |
| def OlympusWr_VFCMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VFCMA : SchedReadAdvance<2, [OlympusWr_VFCMA]>; |
| |
| def OlympusWr_VFMAL : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VFMAL : SchedReadAdvance<2, [OlympusWr_VFMAL]>; |
| |
| def OlympusWr_ZBFMAL : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_ZBFMAL : SchedReadAdvance<2, [OlympusWr_ZBFMAL]>; |
| def OlympusWr_ZBFDOT : SchedWriteRes<[OlympusUnitV]> { let Latency = 5; } |
| def OlympusRd_ZBFDOT : SchedReadAdvance<2, [OlympusWr_ZBFDOT]>; |
| def OlympusWr_ZBFMMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 6; } |
| def OlympusRd_ZBFMMA : SchedReadAdvance<2, [OlympusWr_ZBFMMA]>; |
| |
| // Miscellaneous |
| // ----------------------------------------------------------------------------- |
| |
| def : InstRW<[WriteI], (instrs COPY)>; |
| |
| // 3.2 Branch instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Branch, simple |
| // Compare and branch |
| def : SchedAlias<WriteBr, OlympusWrite_1c_1B>; |
| def : SchedAlias<WriteBrReg, OlympusWrite_1c_1B>; |
| |
| // Branch and link |
| def : InstRW<[OlympusWrite_1c_1I_1B], (instrs BL, BLR)>; |
| |
| // 3.3 Arithmetic and logical instructions |
| // ----------------------------------------------------------------------------- |
| |
| // ALU/Logical basic |
| def : SchedAlias<WriteI, OlympusWrite_1c_1I>; |
| def : SchedAlias<WriteImm, OlympusWrite_1c_1I>; |
| def : InstRW<[OlympusWrite_1c_1I], (instrs GMI, SUBP)>; |
| def : InstRW<[OlympusWrite_0or1c_1I], (instregex "^ORR[WX]rs$")>; |
| |
| // ALU/Logical basic, flag write |
| def : InstRW<[OlympusWrite_1c_1F], (instregex "^(ADD|AND|SUB)S[WX]ri$", |
| "^(ADC|SBC)S[WX]r$", |
| "^SUBPS$")>; |
| |
| // ALU, src extend from H/B |
| def : SchedAlias<WriteIEReg, OlympusWrite_ExtendI>; |
| def : InstRW<[OlympusWrite_ExtendF], (instregex "^(ADD|SUB)S[WX]rx")>; |
| |
| // ALU, src LSL shift > 4 or LSR/ASR/ROR shift |
| def : SchedAlias<WriteISReg, OlympusWrite_ShiftI>; |
| def : InstRW<[OlympusWrite_ShiftF], (instregex "^(ADD|SUB)S[WX]rs$")>; |
| |
| // Logical, flag write, src shift |
| def : InstRW<[OlympusWrite_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; |
| |
| // Conditional compare |
| def : InstRW<[OlympusWrite_1c_1F], (instregex "^CCM[NP][WX][ir]$")>; |
| |
| // Flag manipulation |
| def : SchedAlias<WriteSys, OlympusWrite_1c_1F>; |
| def : InstRW<[OlympusWrite_1c_1F], (instrs SETF8, SETF16, RMIF)>; |
| |
| // Arithmetic to tag |
| def : InstRW<[OlympusWrite_2c_1M], (instrs ADDG, SUBG)>; |
| |
| // Insert Random Tag |
| def : InstRW<[OlympusWrite_2c_1M], (instrs IRG, IRGstack)>; |
| |
| // 3.4 Divide and multiply instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Divide |
| def : SchedAlias<WriteID32, OlympusWrite_12c_1M>; |
| def : SchedAlias<WriteID64, OlympusWrite_20c_1M>; |
| |
| // Multiply accumulate |
| def : SchedAlias<WriteIM32, OlympusWrite_2c_1M>; |
| def : SchedAlias<WriteIM64, OlympusWrite_2c_1M>; |
| def OlympusWr_IM : SchedWriteRes<[OlympusUnitM]> { let Latency = 2; } |
| def OlympusWr_IMA : SchedWriteRes<[OlympusUnitM]> { let Latency = 2; } |
| def OlympusWr_IMX : SchedWriteVariant<[ |
| SchedVar<IsReg3ZeroPred, [OlympusWr_IM]>, |
| SchedVar<NoSchedPred, [OlympusWr_IMA]>]>; |
| def OlympusRd_IMA : SchedReadAdvance<1, [OlympusWr_IM, OlympusWr_IMA]>; |
| def : InstRW<[OlympusWr_IMX, ReadIM, ReadIM, OlympusRd_IMA], |
| (instregex "^M(ADD|SUB)[WX]rrr$")>; |
| def : InstRW<[OlympusWr_IMX, ReadIM, ReadIM, OlympusRd_IMA], |
| (instregex "^[SU]M(ADD|SUB)Lrrr$")>; |
| |
| // Multiply high |
| def : InstRW<[OlympusWrite_3c_1M], (instrs SMULHrr, UMULHrr)>; |
| |
| // 3.5 Pointer authentication instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Authenticate address |
| // Compute pointer authentication code |
| def : InstRW<[OlympusWrite_4c_1M0], (instregex "^AUT", "^PAC")>; |
| |
| // Strip pointer authentication code |
| def : InstRW<[OlympusWrite_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; |
| |
| // Branch, register with authentication |
| def : InstRW<[OlympusWrite_5c_1B_1M0], (instrs BRAA, BRAAZ, BRAB, BRABZ, RETAA, |
| RETAB, ERETAA, ERETAB)>; |
| |
| // Branch and link, register with authentication |
| def : InstRW<[OlympusWrite_5c_1I_1B_1M0], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; |
| |
| // Load, register with authentication |
| def : InstRW<[OlympusWrite_8c_1M0_1L], (instregex "^LDRA[AB]indexed")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1M0_1L], (instregex "^LDRA[AB]writeback")>; |
| |
| // 3.6 Miscellaneous data-processing instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Address generation |
| def : InstRW<[OlympusWrite_1c_1F], (instrs ADR, ADRP)>; |
| |
| // Extract, ROR alias or imms==0 |
| // Extract, other |
| def : SchedAlias<WriteExtr, OlympusWrite_Extr>; |
| |
| // Bitfield move, basic |
| // Count leading |
| // Move immed |
| // Reverse bits/bypes |
| // Variable shift |
| def : SchedAlias<WriteIS, OlympusWrite_1c_1I>; |
| def : InstRW<[OlympusWrite_0or1c_1I], (instregex "^MOVZ[WX]i$")>; |
| |
| // Bitfield move, insert |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^BFM[WX]ri$")>; |
| |
| // CRC checksum |
| def OlympusWr_CRC : SchedWriteRes<[OlympusUnitM]> { let Latency = 2; } |
| def OlympusRd_CRC : SchedReadAdvance<1, [OlympusWr_CRC]>; |
| def : InstRW<[OlympusWr_CRC, OlympusRd_CRC], (instregex "^CRC32")>; |
| |
| // 3.7 Load instructions |
| // ----------------------------------------------------------------------------- |
| |
| def : SchedAlias<WriteLD, OlympusWrite_4c_1L>; |
| def : SchedAlias<WriteLDIdx, OlympusWrite_4c_1L>; |
| |
| // Load register, possible wback |
| // LDR (immediate) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDR[WX]ui$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L], (instregex "^LDR[WX](pre|post)$")>; |
| // LDRB (immediate) |
| def : InstRW<[OlympusWrite_4c_1L], (instrs LDRBBui)>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L], (instregex "^LDRBB(pre|post)$")>; |
| // LDRH (immediate) |
| def : InstRW<[OlympusWrite_4c_1L], (instrs LDRHHui)>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L], (instregex "^LDRHH(pre|post)$")>; |
| // LDRSB, LDRSH (immediate) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDRS[BH][WX]ui$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L], (instregex "^LDRS[BH][WX](pre|post)$")>; |
| // LDRSW (immediate) |
| def : InstRW<[OlympusWrite_4c_1L], (instrs LDRSWui)>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L], (instregex "^LDRSW(pre|post)$")>; |
| |
| // Load register, bare |
| // LDR (register) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDR[WX]ro[WX]$")>; |
| // LDRB (register) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDRBBro[WX]$")>; |
| // LDRH (register) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDRHHro[WX]$")>; |
| // LDRSB, LDRSH (register) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDRS[BH][WX]ro[WX]$")>; |
| // LDRSW (register) |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDRSWro[WX]$")>; |
| // LDUR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDUR[WX]i$")>; |
| // LDURB |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDURBBi$")>; |
| // LDURH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDURHHi$")>; |
| // LDURSB, LDURSH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDURS[BH][WX]i$")>; |
| // LDURSW |
| def : InstRW<[OlympusWrite_4c_1L], (instrs LDURSWi)>; |
| // LDAR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAR[WX]$")>; |
| // LDARB, LDARH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAR[BH]$")>; |
| // LDAPR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPR[WX]$")>; |
| // LDAPRB, LDAPRH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPR[BH]$")>; |
| // LDAPUR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPURX?i$")>; |
| // LDAPURB, LDAPURH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPUR[BH]i$")>; |
| // LDAPURSB, LDAPURSH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPURS[BH][WX]i$")>; |
| // LDAPURSW |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAPURSWi$")>; |
| // LDLAR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDLAR[WX]$")>; |
| // LDLARB, LDLARH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDLAR[BH]$")>; |
| // LDTR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDTR[WX]i$")>; |
| // LDTRB, LDTRH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDTR[BH]i$")>; |
| // LDTRSB, LDTRSH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDTRS[BH][WX]i$")>; |
| // LDTRSW |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDTRSWi$")>; |
| // LDXR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDXR[WX]$")>; |
| // LDXRB, LDXRH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDXR[BH]$")>; |
| // LDAXR |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAXR[WX]$")>; |
| // LDAXRB, LDAXRH |
| def : InstRW<[OlympusWrite_4c_1L], (instregex "^LDAXR[BH]$")>; |
| |
| // Load pair, possible wback |
| // LDP |
| def : InstRW<[OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDP[WX]i$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDP[WX](pre|post)$")>; |
| // LDPSW |
| def : InstRW<[OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDPSWi$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDPSW(pre|post)$")>; |
| |
| // Load pair, bare |
| // LDNP |
| def : InstRW<[OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDNP[WX]i$")>; |
| // LDXP, LDAXP |
| def : InstRW<[OlympusWrite_4c_1L, WriteLDHi], (instregex "^LDA?XP[WX]$")>; |
| |
| // Load literal |
| // LDR (literal) |
| // LDRSW (literal) |
| // PRFM (literal) |
| def : InstRW<[OlympusWrite_5c_1I_1L], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>; |
| |
| // Load allocation tag |
| // LDG |
| def : InstRW<[OlympusWrite_5c_1I_1L], (instrs LDG, LDGM)>; |
| |
| // 3.8 Store instructions |
| // ----------------------------------------------------------------------------- |
| |
| def : SchedAlias<WriteST, OlympusWrite_1c_1SA_1D>; |
| def : SchedAlias<WriteSTIdx, OlympusWrite_1c_1SA_1D>; |
| def : SchedAlias<WriteSTP, OlympusWrite_1c_1SA_1D>; |
| def : SchedAlias<WriteAdr, OlympusWrite_1c_1I>; |
| |
| // Store register, possible wback |
| // STR (immediate) |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STR[WX]ui$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_1c_1SA_1D], (instregex "^STR[WX](pre|post)$")>; |
| // STRB (immediate) |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instrs STRBBui)>; |
| def : InstRW<[WriteAdr, OlympusWrite_1c_1SA_1D], (instregex "^STRBB(pre|post)$")>; |
| // STRH (immediate) |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instrs STRHHui)>; |
| def : InstRW<[WriteAdr, OlympusWrite_1c_1SA_1D], (instregex "^STRHH(pre|post)$")>; |
| |
| // Store register, scaled by 2 |
| // STRH (register) |
| def : InstRW<[OlympusWrite_ScaledSTRH], (instregex "^STRHHro[WX]$")>; |
| |
| // Store register, bare |
| // STR (register) |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STR[WX]ro[WX]$")>; |
| // STRB (register) |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STRBBro[WX]$")>; |
| // STUR |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STUR[WX]i$")>; |
| // STURB |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STURBBi$")>; |
| // STURH |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STURHHi$")>; |
| // STLR |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLR[WX]$")>; |
| // STLRB, STLRH |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLR[BH]$")>; |
| // STLLR |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLLR[WX]$")>; |
| // STLLRB, STLLRH |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLLR[BH]$")>; |
| // STLUR |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLUR[WX]i$")>; |
| // STLURB, STLURH |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STLUR[BH]i$")>; |
| // STTR |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STTR[WX]i$")>; |
| // STTRB, STTRH |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STTR[BH]i$")>; |
| |
| // Store pair, general |
| // STP |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STP[WX]i$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_1c_1SA_1D], (instregex "^STP[WX](pre|post)$")>; |
| |
| // Store pair, non-temporal |
| // STNP |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instregex "^STNP[WX]i$")>; |
| |
| // Store exclusive |
| // STXR |
| def : InstRW<[OlympusWrite_4c_1SA_1D], (instregex "^STXR[WX]$")>; |
| // STXRB, STXRH |
| def : InstRW<[OlympusWrite_4c_1SA_1D], (instregex "^STXR[BH]$")>; |
| // STLXR |
| def : InstRW<[OlympusWrite_4c_1SA_1D], (instregex "^STLXR[WX]$")>; |
| // STLXRB, STLXRH |
| def : InstRW<[OlympusWrite_4c_1SA_1D], (instregex "^STLXR[BH]$")>; |
| // STXP, STLXP |
| def : InstRW<[OlympusWrite_4c_1SA_1D], (instregex "^STL?XP[WX]$")>; |
| |
| // Store allocation tag |
| // ST2G |
| // STG |
| // STZ2G |
| // STZG |
| // STGP |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instrs STGi, ST2Gi, STZGi, |
| STZ2Gi, STGPi, STGM, STZGM)>; |
| def : InstRW<[WriteAdr, OlympusWrite_1c_1SA_1D], (instregex "^ST2?G(Pre|Post)Index$", |
| "^STZ2?G(Pre|Post)Index$", |
| "^STGP(pre|post)$")>; |
| |
| // 3.9 Scalar/SIMD floating point instructions |
| // ----------------------------------------------------------------------------- |
| |
| // FP general |
| def : SchedAlias<WriteF, OlympusWrite_2c_1V>; |
| def : SchedAlias<WriteFImm, OlympusWrite_2c_1V>; |
| |
| // FMOV (scalar, immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMOV[HSD]i$")>; |
| // FMOV (register) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMOV[HSD]r$")>; |
| // FMOV (vector, immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMOV(v4f16|v8f16|v2f32|v4f32|v2f64)_ns$")>; |
| // FCSEL |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCSEL[HSD]rrr$")>; |
| // FABS (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABS[HSD]r$")>; |
| // FABD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABD(16|32|64)$")>; |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABD(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FABS (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABS(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FNEG (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FNEG[HSD]r$")>; |
| // FNEG (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FNEG(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FADD (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADD[HSD]rr$")>; |
| // FADD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADD(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FADDP (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADDP(v2i16|v2i32|v2i64)p$")>; |
| // FADDP (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADDP(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FCADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCADD(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FSUB (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUB[HSD]rr$")>; |
| // FSUB (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUB(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FAC<cc> |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FACG[ET](16|32|64)$")>; |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FACG[ET](v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FCM<cc> (register) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCM(EQ|GE|GT)(16|32|64)$")>; |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCM(EQ|GE|GT)(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FCM<cc> (zero) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCM(EQ|GE|GT|LE|LT)(v1i16|v1i32|v1i64|v4i16|v8i16|v2i32|v4i32|v2i64)rz$")>; |
| // FMAX/FMIN(NM) (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(FMAX|FMIN)(NM)?(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FMAX/FMIN(NM) (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(FMAX|FMIN)(NM)?[HSD]rr$")>; |
| // FMAX(NM)P/FMIN(NM)P (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(FMAX|FMIN)(NM)?P(v2i16|v2i32|v2i64)p$")>; |
| // FMAXNMP/FMINNMP (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(FMAX|FMIN)NMP(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FMAXP/FMINP (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAXP(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FAMAX/FAMIN |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FA(MAX|MIN)(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| |
| // FP compare |
| def : SchedAlias<WriteFCmp, OlympusWrite_2c_1V03>; |
| // FCCMP,FCCMPE |
| def : InstRW<[OlympusWrite_2c_1V03], (instregex "^FCCMPE?[HSD]rr$")>; |
| // FCMP, FCMPE |
| def : InstRW<[OlympusWrite_2c_1V03], (instregex "^FCMPE?[HSD](rr|ri)$")>; |
| |
| // FP multiply |
| def : WriteRes<WriteFMul, [OlympusUnitV]> { let Latency = 3; } |
| def OlympusWr_FMul : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| // FMUL, FNMUL (scalar) |
| def : InstRW<[OlympusWr_FMul], (instregex "^FN?MUL[HSD]rr$")>; |
| // FMUL, FMULX (by element) |
| def : InstRW<[OlympusWr_FMul], (instregex "^FMULX?(v1i16|v1i32|v1i64|v4i16|v8i16|v2i32|v4i32|v2i64)_indexed$")>; |
| // FMUL (vector) |
| def : InstRW<[OlympusWr_FMul], (instregex "^FMUL(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FMULX |
| def : InstRW<[OlympusWr_FMul], (instregex "^FMULX(16|32|64)$")>; |
| def : InstRW<[OlympusWr_FMul], (instregex "^FMULX(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| // FSCALE |
| def : InstRW<[OlympusWr_FMul], (instregex "^FSCALE(v4f16|v8f16|v2f32|v4f32|v2f64)$")>; |
| |
| // FP multiply accumulate |
| def OlympusWr_VFMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_VFMA : SchedReadAdvance<0, [OlympusWr_FMul, OlympusWr_VFMA], [1, 2]>; |
| // FN?MADD, FN?MSUB |
| def : InstRW<[OlympusWr_VFMA, ReadDefault, ReadDefault, OlympusRd_VFMA], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; |
| // FCMLA (by element) |
| def : InstRW<[OlympusWr_VFCMA, OlympusRd_VFCMA], (instregex "^FCMLA(v4f16|v8f16|v4f32)_indexed$")>; |
| // FCMLA |
| def : InstRW<[OlympusWr_VFCMA, OlympusRd_VFCMA], (instregex "^FCMLA(v4f16|v2f32|v8f16|v4f32|v2f64)$")>; |
| // FMLA, FMLS (by element) |
| def : InstRW<[OlympusWr_VFMA, OlympusRd_VFMA], (instregex "^FML[AS]v.+_indexed$")>; |
| // FMLA, FMLS (vector) |
| def : InstRW<[OlympusWr_VFMA, OlympusRd_VFMA], (instregex "^FML[AS](v4f16|v2f32|v8f16|v4f32|v2f64)$")>; |
| // FMLAL, FMLAL2 (by element) |
| // FMLSL, FMLSL2 (by element) |
| def : InstRW<[OlympusWr_VFMAL, OlympusRd_VFMAL], (instregex "^FML[AS]L2?lanev")>; |
| // FMLAL, FMLAL2 (vector) |
| // FMLSL, FMLSL2 (vector) |
| def : InstRW<[OlympusWr_VFMAL, OlympusRd_VFMAL], (instregex "^FML[AS]L2?(v4f16|v2f32|v8f16|v4f32)$")>; |
| |
| // FP reduction, DP/SP/(HP 64b) |
| // FMAXNMV, FMAXV, FMINMV, FMINV |
| def : InstRW<[OlympusWrite_4c_2V], (instregex "^(FMAX|FMIN)(NM)?V(v4i16|v4i32)v$")>; |
| |
| // FP reduction, HP 128b |
| // FMAXNMV, FMAXV, FMINMV, FMINV |
| def : InstRW<[OlympusWrite_6c_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; |
| |
| // FP divide/sqrt DP, scalar |
| def : SchedAlias<WriteFDiv, OlympusWrite_8c_1V12>; |
| // FDIV, FSQRT (scalar) |
| def : InstRW<[OlympusWrite_13c_1V12], (instrs FDIVDrr, FSQRTDr)>; |
| |
| // FP divide/sqrt SP, scalar |
| // FDIV, FSQRT (scalar) |
| def : InstRW<[OlympusWrite_8c_1V12], (instrs FDIVSrr, FSQRTSr)>; |
| |
| // FP divide/sqrt HP, scalar |
| // FDIV, FSQRT (scalar) |
| def : InstRW<[OlympusWrite_6c_1V12], (instrs FDIVHrr, FSQRTHr)>; |
| |
| // FP divide/sqrt, vector DP 128b |
| // FDIV, FSQRT (vector) |
| def : InstRW<[OlympusWrite_14c_1V12_2], (instrs FDIVv2f64, FSQRTv2f64)>; |
| |
| // FP divide/sqrt, vector SP 128b |
| // FDIV, FSQRT (vector) |
| def : InstRW<[OlympusWrite_11c_1V12_4], (instrs FDIVv4f32, FSQRTv4f32)>; |
| |
| // FP divide/sqrt, vector SP 64b |
| // FDIV, FSQRT (vector) |
| def : InstRW<[OlympusWrite_9c_1V12_2], (instrs FDIVv2f32, FSQRTv2f32)>; |
| |
| // FP divide/sqrt, vector HP 128b |
| // FDIV, FSQRT (vector) |
| def : InstRW<[OlympusWrite_13c_1V12_8], (instrs FDIVv8f16, FSQRTv8f16)>; |
| |
| // FP divide/sqrt, vector HP 64b |
| // FDIV, FSQRT (vector) |
| def : InstRW<[OlympusWrite_9c_1V12_4], (instrs FDIVv4f16, FSQRTv4f16)>; |
| |
| // FP round to int, scalar |
| // FRINT<cc> (scalar) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRINT[AIMNPXZ][HSD]r$")>; |
| // FRINT32X (scalar) |
| // FRINT32Z (scalar) |
| // FRINT64X (scalar) |
| // FRINT64Z (scalar) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRINT(32|64)[XZ][SD]r$")>; |
| |
| // FP round to int, vector, DP 128b or SP 64b |
| // FRINT<cc> (vector) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRINT[AIMNPXZ](v2f64|v2f32)$")>; |
| // FRINT32X (vector) |
| // FRINT32Z (vector) |
| // FRINT64X (vector) |
| // FRINT64Z (vector) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRINT(32|64)[XZ](v2f32|v2f64)$")>; |
| |
| // FP round to int, vector, SP 128b or HP 64b |
| // FRINT<cc> (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRINT[AIMNPXZ](v4f32|v4f16)$")>; |
| // FRINT32X (vector) |
| // FRINT32Z (vector) |
| // FRINT64X (vector) |
| // FRINT64Z (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRINT(32|64)[XZ]v4f32$")>; |
| |
| // FP round to int, vector, HP 128b |
| // FRINT<cc> (vector) |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FRINT[AIMNPXZ]v8f16$")>; |
| |
| // FP convert, scalar |
| // FCVT |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr, FCVTHDr, FCVTSDr)>; |
| |
| // FCVTAS (vector) |
| // FCVTAU (vector) |
| // FCVTMS (vector) |
| // FCVTMU (vector) |
| // FCVTNS (vector) |
| // FCVTNU (vector) |
| // FCVTPS (vector) |
| // FCVTPU (vector) |
| // FCVTZS (vector, integer) |
| // FCVTZU (vector, integer) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVT[AMNPZ][SU](v1i32|v1i64)$")>; |
| // FCVTZS (vector, fixed-point) |
| // FCVTZU (vector, fixed-point) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTZ[SU][dsh]$")>; |
| // SCVTF (vector, integer) |
| // UCVTF (vector, integer) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU]CVTF(v1i16|v1i32|v1i64)$")>; |
| // SCVTF (vector, fixed-point) |
| // UCVTF (vector, fixed-point) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU]CVTF[dsh]$")>; |
| // FCVTXN |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTXNv1i64)>; |
| |
| // FP convert, vector between F32 and F64 |
| // FCVTL, FCVTL2 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTL(v2i32|v4i32)$")>; |
| // FCVTN, FCVTN2 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTN(v2i32|v4i32)$")>; |
| |
| // FP convert, vector F32 to F16 |
| // FCVTN, FCVTN2 |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^FCVTN(v4i16|v8i16)$")>; |
| |
| // FP convert, vector F16 to F32 |
| // FCVTL, FCVTL2 |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVTL(v4i16|v8i16)$")>; |
| |
| // FP convert, Javascript |
| def : SchedAlias<WriteFCvt, OlympusWrite_4c_1V03>; |
| // FJCVTZS |
| def : InstRW<[OlympusWrite_4c_1V03], (instrs FJCVTZS)>; |
| |
| // FP convert, vector, DP 128b or SP 64b |
| // FCVT<cc> (vector) |
| // FCVTZS (vector, integer) |
| // FCVTZU (vector, integer) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVT[AMNPZ][SU](v2f32|v2f64)$")>; |
| // FCVTZS (vector, fixed-point) |
| // FCVTZU (vector, fixed-point) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTZ[SU](v2i32|v2i64)_shift$")>; |
| |
| // SCVTF (vector, integer) |
| // UCVTF (vector, integer) |
| // SCVTF (vector, fixed-point) |
| // UCVTF (vector, fixed-point) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU]CVTF(v2f32|v2f64)$", |
| "^[SU]CVTF(v2i32|v2i64)_shift$")>; |
| // FCVTXN, FCVTXN2 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTXNv2f32, FCVTXNv4f32)>; |
| |
| // FP convert, vector, SP 128b or HP 64b |
| // FCVT<cc> (vector) |
| // FCVTZS (vector, integer) |
| // FCVTZU (vector, integer) |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVT[AMNPZ][SU](v4f32|v4f16)$")>; |
| // FCVTZS (vector, fixed-point) |
| // FCVTZU (vector, fixed-point) |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVTZ[SU](v4i16|v4i32)_shift$")>; |
| |
| // SCVTF (vector, integer) |
| // UCVTF (vector, integer) |
| // SCVTF (vector, fixed-point) |
| // UCVTF (vector, fixed-point) |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^[SU]CVTF(v4f16|v4f32)$", |
| "^[SU]CVTF(v4i16|v4i32)_shift$")>; |
| |
| // FP convert, vector, HP 128b |
| // FCVT<cc> (vector) |
| // FCVTZS (vector, integer) |
| // FCVTZU (vector, integer) |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FCVT[AMNPZ][SU]v8f16$", |
| "^FCVT[AMNPZ][SU]v1f16$")>; |
| // FCVTZS (vector, fixed-point) |
| // FCVTZU (vector, fixed-point) |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FCVTZ[SU]v8i16_shift$")>; |
| |
| // SCVTF (vector, integer) |
| // UCVTF (vector, integer) |
| // SCVTF (vector, fixed-point) |
| // UCVTF (vector, fixed-point) |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^[SU]CVTFv8f16$", |
| "^[SU]CVTFv8i16_shift$")>; |
| |
| // FP convert from gen to vec reg |
| // SCVTF (scalar, integer) |
| // UCVTF (scalar, integer) |
| // SCVTF (scalar, fixed-point) |
| // UCVTF (scalar, fixed-point) |
| def : InstRW<[OlympusWrite_5c_1M_1V], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; |
| |
| // FP convert, from vec to gen reg |
| // FCVT<cc> (scalar) |
| // FCVTZS (scalar, integer) |
| // FCVTZU (scalar, integer) |
| def : InstRW<[OlympusWrite_4c_1V03], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; |
| // FCVTZS (scalar, fixed-point) |
| // FCVTZU (scalar, fixed-point) |
| def : InstRW<[OlympusWrite_4c_1V03], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri$")>; |
| |
| // FP reciprocal and square root estimate, scalar |
| // FRECPX |
| // FRECPE |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRECPX(v1f16|v1i32|v1i64)$")>; |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRECPE(v1f16|v1i32|v1i64)$")>; |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRSQRTE(v1f16|v1i32|v1i64)$")>; |
| |
| // FP reciprocal and square root estimate, vector, DP 128b or SP 64b |
| // FRECPE |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRECPE(v2f64|v2f32)$")>; |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRSQRTE(v2f64|v2f32)$")>; |
| |
| // FP reciprocal and square root estimate, vector, SP 128b or HP 64b |
| // FRECPE |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRECPE(v4f32|v4f16)$")>; |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRSQRTE(v4f32|v4f16)$")>; |
| |
| // FP reciprocal and square root estimate, vector, HP 128b |
| // FRECPE |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FRECPEv8f16$")>; |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FRSQRTEv8f16$")>; |
| |
| // FP reciprocal and square root step |
| // FRECPS |
| // FRSQRTS |
| def : InstRW<[OlympusWr_FRS, OlympusRd_FRS, OlympusRd_FRS], |
| (instregex "^FRECPS(16|32|64|v)", "^FRSQRTS(16|32|64|v)")>; |
| |
| // FP mov, from vec to gen reg |
| def : SchedAlias<WriteFCopy, OlympusWrite_3c_1V03>; |
| def : InstRW<[OlympusWrite_3c_1V03], (instrs FMOVSWr, FMOVHWr, FMOVDXr, FMOVHXr)>; |
| |
| // FP mov, from gen to low half of vec reg |
| def : InstRW<[OlympusWrite_0or3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; |
| |
| // FP mov, from gen to high half of vec |
| def : InstRW<[OlympusWrite_5c_1M_1V], (instrs FMOVXDHighr)>; |
| |
| // 3.10 SIMD FP8 instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Convert to/from F16 |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^F[12]CVTL2?$", |
| "^FCVTN_F16v(8|16)f8$")>; |
| |
| // Convert from F32 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTN_F32v8f8, FCVTN_F322v16f8)>; |
| |
| // Dot product, multiply accumulate |
| def OlympusWr_F8MA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_F8MA : SchedReadAdvance<2, [OlympusWr_F8MA]>; |
| // FDOT (8-bit floating-point to half-precision, vector) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FDOTv(4|8)f16$")>; |
| // FDOT (8-bit floating-point to single-precision, vector) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FDOTv(2|4)f32$")>; |
| // FDOT (8-bit floating-point to half-precision, by element) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FDOTlanev(4|8)f16$")>; |
| // FDOT (8-bit floating-point to single-precision, by element) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FDOTlanev(2|4)f32$")>; |
| // FMLALB, FMLALT (vector) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FMLAL[BT]v16i8_v8f16$")>; |
| // FMLALLBB, FMLALLBT, FMLALLTB, FMLALLTT (vector) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FMLALL[BT][BT]v4f32$")>; |
| // FMLALB, FMLALT (by element) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FMLAL[BT]lanev8f16$")>; |
| // FMLALLBB, FMLALLBT, FMLALLTB, FMLALLTT (by element) |
| def : InstRW<[OlympusWr_F8MA, OlympusRd_F8MA], (instregex "^FMLALL[BT][BT]lanev4f32$")>; |
| |
| // 3.11 SIMD BF16 instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Convert from F8 |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^BF[12]CVTL2?$")>; |
| |
| // Convert from F32, vector |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs BFCVTN, BFCVTN2)>; |
| |
| // Convert from F32, scalar |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs BFCVT)>; |
| |
| // Multiply accumulate |
| def OlympusWr_BF16MA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_BF16MA : SchedReadAdvance<2, [OlympusWr_BF16MA]>; |
| def : InstRW<[OlympusWr_BF16MA, OlympusRd_BF16MA], (instregex "^BFMLAL[BT](Idx)?$")>; |
| |
| // Dot product |
| def OlympusWr_BF16DOT : SchedWriteRes<[OlympusUnitV]> { let Latency = 5; } |
| def OlympusRd_BF16DOT : SchedReadAdvance<2, [OlympusWr_BF16DOT]>; |
| def : InstRW<[OlympusWr_BF16DOT, OlympusRd_BF16DOT], (instregex "^BFDOTv[48]bf16$", |
| "^BF16DOTlanev[48]bf16$")>; |
| |
| // Matrix multiply accumulate |
| def OlympusWr_BF16MMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 6; } |
| def OlympusRd_BF16MMA : SchedReadAdvance<2, [OlympusWr_BF16MMA]>; |
| def : InstRW<[OlympusWr_BF16MMA, OlympusRd_BF16MMA], (instrs BFMMLA)>; |
| |
| // 3.12 SIMD integer instructions |
| // ----------------------------------------------------------------------------- |
| |
| // General |
| def : SchedAlias<WriteVd, OlympusWrite_2c_1V>; |
| def : SchedAlias<WriteVq, OlympusWrite_2c_1V>; |
| |
| // ABS |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ABS(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // ADD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADD(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // ADDHN, ADDHN2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDHN(v8i16_v8i8|v8i16_v16i8|v4i32_v4i16|v2i64_v2i32|v4i32_v8i16|v2i64_v4i32)$")>; |
| // ADDP (scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDPv2i64p$")>; |
| // ADDP (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDP(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // AND (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^AND(v8i8|v16i8)$")>; |
| // BIC (vector, immediate) |
| // BIC (vector, register) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^BIC(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // BIF |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^BIF(v8i8|v16i8)$")>; |
| // BIT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^BIT(v8i8|v16i8)$")>; |
| // BSL |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^BSL(v8i8|v16i8)$")>; |
| // CLS (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CLS(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // CLZ (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CLZ(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // CMEQ, CMGE, CMGT, CMHI, CMHS (register) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CM(EQ|GE|GT|HI|HS)(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // CMEQ, CMGE, CMGT, CMLE, CMLT (zero) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CM(EQ|GE|GT|LE|LT)(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)rz$")>; |
| // CMTST |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CMTST(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // CNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CNT(v8i8|v16i8)$")>; |
| // DUP (element) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^DUP(i8|i16|i32|i64)$", |
| "^DUP(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)lane$")>; |
| // EOR (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EOR(v8i8|v16i8)$")>; |
| // EXT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EXT(v8i8|v16i8)$")>; |
| // INS (element) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^INS(vi8|vi16|vi32|vi64)lane$")>; |
| // LUTI2, LUTI4 |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^LUT[24]_[BH]")>; |
| // MOVI |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^MOVI(v2i32|v4i16|v4i32|v8i16)$")>; |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^MOVI(v2s_msl|v4s_msl|v8b_ns|v16b_ns)$")>; |
| def : InstRW<[OlympusWrite_0or2c_1V], (instrs MOVID, MOVIv2d_ns)>; |
| // MVNI |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^MVNI(v2i32|v4i16|v4i32|v8i16)$")>; |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^MVNI(v2s_msl|v4s_msl)$")>; |
| // NEG (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^NEG(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // NOT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^NOT(v8i8|v16i8)$")>; |
| // ORN (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ORN(v8i8|v16i8)$")>; |
| // ORR (vector, immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ORR(v4i32|v8i16|v2i32|v4i16)$")>; |
| // ORR (vector, register) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ORR(v8i8|v16i8)$")>; |
| // RADDHN, RADDHN2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RADDHN(v8i16_v8i8|v8i16_v16i8|v4i32_v4i16|v2i64_v2i32|v4i32_v8i16|v2i64_v4i32)$")>; |
| // RBIT (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RBIT(v8i8|v16i8)$")>; |
| // REV16 (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^REV16(v8i8|v16i8)$")>; |
| // REV32 (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^REV32(v8i8|v4i16|v16i8|v8i16)$")>; |
| // REV64 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^REV64(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // RSUBHN, RSUBHN2, SUBHN, SUBHN2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^R?SUBHN(v8i16_v8i8|v8i16_v16i8|v4i32_v4i16|v2i64_v2i32|v4i32_v8i16|v2i64_v4i32)$")>; |
| // SHL |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHLd$", "^SHL(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SHLL, SHLL2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHLL(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SHRN, SHRN2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHRN(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // SLI, SRI |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^S(LI|RI)d$", "^S(LI|RI)(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SUB (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUB(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // TRN1, TRN2 |
| // UZP1, UZP2 |
| // ZIP1, ZIP2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(TRN|UZP|ZIP)[12](v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // XTN, XTN2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^XTN(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SABD |
| // UABD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]ABD(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SABDL, SABDL2 |
| // UABDL, UABDL2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]ABDL(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SADDL, SADDL2 |
| // UADDL, UADDL2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]ADDL(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SADDLP, UADDLP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]ADDLP(v8i8_v4i16|v4i16_v2i32|v2i32_v1i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SADDW, SADDW2 |
| // UADDW, UADDW2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]ADDW(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SHADD, SRHADD |
| // UHADD, URHADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]R?HADD(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SHSUB |
| // UHSUB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]HSUB(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SMIN, SMAX |
| // SMINP, SMAXP |
| // UMIN, UMAX |
| // UMINP, UMAXP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU](MIN|MAX)P?(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SQADD |
| // UQADD |
| // SQSUB |
| // UQSUB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]Q(ADD|SUB)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SQABS |
| // SQNEG |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQ(ABS|NEG)(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SSHL |
| // USHL |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]SHL(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SSHLL, SSHLL2 |
| // USHLL, USHLL2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]SHLL(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // SSHR |
| // USHR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSHRd$", "^USHRd$", "^[SU]SHR(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SSUBL, SSUBL2 |
| // SSUBW, SSUBW2 |
| // USUBL, USUBL2 |
| // USUBW, USUBW2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^[SU]SUB[LW](v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SUQADD |
| // USQADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^(SU|US)QADD(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| |
| // Shift |
| |
| // RSHRN, RSHRN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^RSHRN(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // SQRSHL |
| // UQRSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^[SU]QRSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SQRSHRN, SQRSHRN2 |
| // SQRSHRUN, SQRSHRUN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHRU?N[bhs]", "^SQRSHRU?N(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // UQRSHRN, UQRSHRN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQRSHRU?N[bhs]", "^UQRSHRN(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // SQSHL (immediate) |
| // UQSHL (immediate) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^[SU]QSHL[bhsd]", "^[SU]QSHL(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SQSHL (register) |
| // UQSHL (register) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SQSHLU |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHLU[bhsd]", "^SQSHLU(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SQSHRN, SQSHRN2 |
| // SQSHRUN, SQSHRUN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHRU?N[bhs]", "^SQSHRU?N(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // UQSHRN, UQSHRN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHRN[bhs]", "^UQSHRN(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)_shift$")>; |
| // SRSHL |
| // URSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^[SU]RSHL(v8i8|v4i16|v2i32|v1i64|v16i8|v8i16|v4i32|v2i64)$")>; |
| // SRSHR |
| // URSHR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^[SU]RSHRd$", "^[SU]RSHR(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SQXTN, SQXTN2 |
| // SQXTUN, SQXTUN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQXTU?N(v1i8|v1i16|v1i32|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // UQXTN, UQXTN2 |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQXTN(v1i8|v1i16|v1i32|v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| |
| // Multiply |
| |
| // MUL (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // MUL (by element) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL(v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SMULL, SMULL2 (vector) |
| // UMULL, UMULL2 (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^[SU]MULL(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SMULL, SMULL2 (by element) |
| // UMULL, UMULL2 (by element) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^[SU]MULL(v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SQDMULH (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULH(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)$")>; |
| // SQDMULL, SQDMULL2 (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULL(i16|i32)$", "^SQDMULL(v4i16_v4i32|v2i32_v2i64|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SQRDMULH (vector) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQRDMULH(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)$")>; |
| // SQDMULH (by element) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULH(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SQDMULL, SQDMULL2 (by element) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULL(v4i16|v2i32|v8i16|v4i32|v1i32|v1i64)_indexed$")>; |
| // SQRDMULH (by element) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQRDMULH(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| |
| // Multiply accumulate |
| |
| // MLA (vector) |
| // MLS (vector) |
| def OlympusWr_VMA : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusRd_VMA : SchedReadAdvance<2, [OlympusWr_VMA]>; |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^ML[AS](v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SMLAL, SMLAL2 (vector) |
| // SMLSL, SMLSL2 (vector) |
| // UMLAL, UMLAL2 (vector) |
| // UMLSL, UMLSL2 (vector) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^[SU]ML[AS]L(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // MLA (by element) |
| // MLS (by element) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^ML[AS](v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SMLAL, SMLAL2 (by element) |
| // SMLSL, SMLSL2 (by element) |
| // UMLAL, UMLAL2 (by element) |
| // UMLSL, UMLSL2 (by element) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^[SU]ML[AS]L(v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SQDMLAL, SQDMLAL2 (vector) |
| // SQDMLSL, SQDMLSL2 (vector) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^SQDML[AS]L(i16|i32|v4i16_v4i32|v2i32_v2i64|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SQRDMLAH (vector) |
| // SQRDMLSH (vector) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^SQRDML[AS]H(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)$")>; |
| // SQDMLAL, SQDMLAL2 (by element) |
| // SQDMLSL, SQDMLSL2 (by element) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^SQDML[AS]L(v1i32|v1i64|v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| // SQRDMLAH (by element) |
| // SQRDMLSH (by element) |
| def : InstRW<[OlympusWr_VMA, OlympusRd_VMA], (instregex "^SQRDML[AS]H(v1i16|v1i32|v4i16|v2i32|v8i16|v4i32)_indexed$")>; |
| |
| // Abs diff/shift accumulate |
| // SABA |
| // UABA |
| def : InstRW<[OlympusWr_VA, OlympusRd_VA], (instregex "^[SU]ABA(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32)$")>; |
| // SABAL, SABAL2 |
| // UABAL, UABAL2 |
| def : InstRW<[OlympusWr_VA, OlympusRd_VA], (instregex "^[SU]ABAL(v8i8_v8i16|v4i16_v4i32|v2i32_v2i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SADALP |
| // UADALP |
| def : InstRW<[OlympusWr_VPA, OlympusRd_VPA], (instregex "^[SU]ADALP(v8i8_v4i16|v4i16_v2i32|v2i32_v1i64|v16i8_v8i16|v8i16_v4i32|v4i32_v2i64)$")>; |
| // SRSRA |
| // URSRA |
| def : InstRW<[OlympusWr_VSA, OlympusRd_VSA], (instregex "^[SU]RSRAd$", "^[SU]RSRA(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| // SSRA |
| // USRA |
| def : InstRW<[OlympusWr_VSA, OlympusRd_VSA], (instregex "^[SU]SRAd$", "^[SU]SRA(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)_shift$")>; |
| |
| // Dot product, matrix multiply |
| |
| // SDOT (vector) |
| // UDOT (vector) |
| // USDOT (vector) |
| def : InstRW<[OlympusWr_VDOT, OlympusRd_VDOT], (instregex "^(S|U|US)DOT(v8i8|v16i8)$")>; |
| // SDOT (by element) |
| // SUDOT (by element) |
| // UDOT (by element) |
| // USDOT (by element) |
| def : InstRW<[OlympusWr_VDOT, OlympusRd_VDOT], (instregex "^(S|U|SU|US)DOTlane(v8i8|v16i8)$")>; |
| // SMMLA (vector) |
| // UMMLA (vector) |
| // USMMLA (vector) |
| def : InstRW<[OlympusWr_VMMA, OlympusRd_VMMA], (instregex "^(U|S|US)MMLA$")>; |
| |
| // Arith reduce, 4H/4S |
| // ADDV |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^ADDV(v4i16|v4i32)v$")>; |
| // SADDLV |
| // UADDLV |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU]ADDLV(v4i16|v4i32)v$")>; |
| // SMAXV |
| // UMAXV |
| // SMINV |
| // UMINV |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU](MAX|MIN)V(v4i16|v4i32)v$")>; |
| |
| // Arith reduce, 8B/8H |
| // ADDV |
| def : InstRW<[OlympusWrite_5c_1V_1V0123], (instregex "^ADDV(v8i8|v8i16)v$")>; |
| // SADDLV |
| // UADDLV |
| def : InstRW<[OlympusWrite_5c_1V_1V0123], (instregex "^[SU]ADDLV(v8i8|v8i16)v$")>; |
| // SMAXV |
| // UMAXV |
| // SMINV |
| // UMINV |
| def : InstRW<[OlympusWrite_5c_1V_1V0123], (instregex "^[SU](MAX|MIN)V(v8i8|v8i16)v$")>; |
| |
| // Arith reduce, 16B |
| // ADDV |
| def : InstRW<[OlympusWrite_6c_2V0123], (instregex "^ADDVv16i8v$")>; |
| // SADDLV |
| // UADDLV |
| def : InstRW<[OlympusWrite_6c_2V0123], (instregex "^[SU]ADDLVv16i8v$")>; |
| // SMAXV |
| // UMAXV |
| // SMINV |
| // UMINV |
| def : InstRW<[OlympusWrite_6c_2V0123], (instregex "[SU](MAX|MIN)Vv16i8v$")>; |
| |
| // Polynomial multiply |
| // PMUL |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^PMUL(v8i8|v16i8)$")>; |
| |
| // Polynomial multiply long |
| // PMULL, PMULL2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^PMULL(v8i8|v16i8)$")>; |
| |
| // Reciprocal and square root estimate, 2S |
| // URECPE |
| // URSQRTE |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs URECPEv2i32, URSQRTEv2i32)>; |
| |
| // Reciprocal and square root estimate, 4S |
| // URECPE |
| // URSQRTE |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs URECPEv4i32, URSQRTEv4i32)>; |
| |
| // Table lookup, single or two register table |
| def : InstRW<[OlympusWrite_2c_1V], (instrs TBLv8i8One, TBLv16i8One, |
| TBLv8i8Two, TBLv16i8Two)>; |
| |
| // Table lookup, three register table |
| def : InstRW<[OlympusWrite_4c_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; |
| |
| // Table lookup, four register table |
| def : InstRW<[OlympusWrite_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>; |
| |
| // Table lookup extension, single register table |
| def : InstRW<[OlympusWrite_2c_1V], (instrs TBXv8i8One, TBXv16i8One)>; |
| |
| // Table lookup extension, two register table |
| def : InstRW<[OlympusWrite_4c_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; |
| |
| // Table lookup extension, three register table |
| def : InstRW<[OlympusWrite_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>; |
| |
| // Table lookup extension, four register table |
| def : InstRW<[OlympusWrite_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>; |
| |
| // Transfer vec to gen register |
| // SMOV |
| def : InstRW<[OlympusWrite_3c_2V03], (instregex "^SMOV(vi8to32|vi8to64|vi16to32|vi16to64|vi32to64)_idx0$")>; |
| def : InstRW<[OlympusWrite_3c_2V03], (instregex "^SMOV(vi8to32|vi16to32|vi8to64|vi16to64|vi32to64)$")>; |
| // UMOV |
| def : InstRW<[OlympusWrite_3c_2V03], (instregex "^UMOV(vi8|vi16|vi32|vi64)_idx0$")>; |
| def : InstRW<[OlympusWrite_3c_2V03], (instregex "^UMOV(vi8|vi16|vi32|vi64)$")>; |
| |
| // Transfer gen to vec register |
| // DUP (general) |
| def : InstRW<[OlympusWrite_3c_1M], (instregex "^DUP(v8i8|v4i16|v2i32|v16i8|v8i16|v4i32|v2i64)gpr$")>; |
| |
| // Insert gen to vec register |
| // INS (general) |
| def : InstRW<[OlympusWrite_5c_1M_1V], (instregex "^INS(vi8|vi16|vi32|vi64)gpr$")>; |
| |
| // 3.13 Cryptography extensions instructions |
| // ----------------------------------------------------------------------------- |
| |
| // AES, SHA3 operations |
| // AESD |
| // AESE |
| // AESIMC |
| // AESMC |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr$")>; |
| // BCAX |
| // EOR3 |
| // RAX1 |
| // XAR |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BCAX, EOR3, RAX1, XAR)>; |
| |
| // Polynomial Multiply Long |
| def : InstRW<[OlympusWrite_2c_1V], (instrs PMULLv1i64, PMULLv2i64)>; |
| |
| // SHA general |
| // SHA1H |
| // SHA1SU0 |
| // SHA1SU1 |
| def : InstRW<[OlympusWrite_2c_1V0], (instrs SHA1Hrr, SHA1SU0rrr, SHA1SU1rr)>; |
| // SHA256SU0 |
| // SHA256SU1 |
| def : InstRW<[OlympusWrite_2c_1V0], (instrs SHA256SU0rr, SHA256SU1rrr)>; |
| // SHA512H2 |
| // SHA512H |
| // SHA512SU0 |
| // SHA512SU1 |
| def : InstRW<[OlympusWrite_2c_1V0], (instrs SHA512H2, SHA512H, SHA512SU0, SHA512SU1)>; |
| |
| // SHA hash accelaration |
| // SHA1C |
| // SHA1M |
| // SHA1P |
| // SHA256H2 |
| // SHA256H |
| def : InstRW<[OlympusWrite_4c_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; |
| |
| // SM3 operations |
| def : InstRW<[OlympusWrite_2c_1V0123], (instregex "^SM3PARTW[12]$", "^SM3SS1$", |
| "^SM3TT[12][AB]$")>; |
| |
| // SM4 operations |
| def : InstRW<[OlympusWrite_4c_1V0], (instrs SM4E, SM4ENCKEY)>; |
| |
| // 3.14 FP load instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Load vector register, immediate |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LDR[BHSDQ]ui$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LDR[BHSDQ](pre|post)$")>; |
| |
| // Load vector register, unscaled immediate |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LDUR[BHSDQ]i$")>; |
| |
| // Load vector register, register |
| def : InstRW<[OlympusWrite_7c_1I_1L, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>; |
| |
| // Load vector register, literal |
| def : InstRW<[OlympusWrite_7c_1I_1L], (instregex "^LDR[SDQ]l$")>; |
| |
| // Load vector pair, 128b |
| // Load vector pair non-temporal, 128b |
| def : InstRW<[OlympusWrite_6c_1L, OlympusWrite_6c_1L], (instrs LDPQi, LDNPQi)>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L, OlympusWrite_6c_1L], (instregex "^LDPQ(pre|post)$")>; |
| |
| // Load vector pair, 32b/64b |
| // Load vector pair non-temporal, 32b/64b |
| def : InstRW<[OlympusWrite_6c_1L, OlympusWrite_6c], (instregex "^LDN?P[SD]i$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L, OlympusWrite_6c], (instregex "^LDP[SD](pre|post)$")>; |
| |
| // 3.15 FP store instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Store vector register, immediate |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^STR[BHSDQ]ui$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123], (instregex "^STR[BHSDQ](pre|post)$")>; |
| |
| // Store vector register, unscaled immediate |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^STUR[BHSDQ]i$")>; |
| |
| // Store vector register, register |
| def : InstRW<[OlympusWrite_3c_1I_1SA_1V0123, ReadAdrBase], (instregex "^STR[BHSDQ]ro[WX]$")>; |
| |
| // Store vector pair, 128 bit |
| // Store vector pair non-temporal, 128 bit |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123, OlympusWrite_3c_1SA_1V0123], (instrs STPQi, STNPQi)>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123, OlympusWrite_3c_1SA_1V0123], (instregex "^STPQ(pre|post)$")>; |
| |
| // Store vector pair, 32/64 bit |
| // Store vector pair non-temporal, 32/64 bit |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^STP[SD]i$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123], (instregex "^STN?P[SD](pre|post)$")>; |
| |
| // 3.16 SIMD load instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Load 1 element, multiple, 1 register |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 1 element, multiple, 2 registers, D form |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; |
| |
| // Load 1 element, multiple, 2 registers, Q form |
| def : InstRW<[OlympusWrite_6c_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_2L], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 1 element, multiple, 3 registers, D form |
| def : InstRW<[OlympusWrite_6c_2L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_2L], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; |
| |
| // Load 1 element, multiple, 3 registers, Q form |
| def : InstRW<[OlympusWrite_6c_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_3L], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 1 element, multiple, 4 registers, D form |
| def : InstRW<[OlympusWrite_6c_2L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_2L], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; |
| |
| // Load 1 element, multiple, 4 registers, Q form |
| def : InstRW<[OlympusWrite_6c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 1 element, one lane |
| def : InstRW<[OlympusWrite_8c_1L_1V], (instregex "^LD1i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_1V], (instregex "^LD1i(8|16|32|64)_POST$")>; |
| |
| // Load 1 element, all lanes |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1Rv(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LD1Rv(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1Rv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_1L], (instregex "^LD1Rv(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 2 element, multiple, D form |
| def : InstRW<[OlympusWrite_8c_1L_2V], (instregex "^LD2Twov(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_2V], (instregex "^LD2Twov(8b|4h|2s)_POST$")>; |
| |
| // Load 2 element, multiple, Q form |
| def : InstRW<[OlympusWrite_8c_2L_2V], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_2V], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 2 element, one lane |
| def : InstRW<[OlympusWrite_8c_1L_2V], (instregex "^LD2i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_2V], (instregex "^LD2i(8|16|32|64)_POST$")>; |
| |
| // Load 2 element, all lanes |
| def : InstRW<[OlympusWrite_8c_1L_2V], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_2V], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[OlympusWrite_8c_1L_2V], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_2V], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 3 element, multiple, D form |
| def : InstRW<[OlympusWrite_8c_2L_3V], (instregex "^LD3Threev(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_3V], (instregex "^LD3Threev(8b|4h|2s)_POST$")>; |
| |
| // Load 3 element, multiple, Q form |
| def : InstRW<[OlympusWrite_8c_3L_3V], (instregex "^LD3Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_3L_3V], (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 3 element, one lane |
| def : InstRW<[OlympusWrite_8c_2L_3V], (instregex "^LD3i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_3V], (instregex "^LD3i(8|16|32|64)_POST$")>; |
| |
| // Load 3 element, all lanes |
| def : InstRW<[OlympusWrite_8c_2L_3V], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_3V], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[OlympusWrite_8c_2L_3V], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_3V], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 4 element, multiple, D form |
| def : InstRW<[OlympusWrite_8c_2L_4V], (instregex "^LD4Fourv(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_4V], (instregex "^LD4Fourv(8b|4h|2s)_POST$")>; |
| |
| // Load 4 element, multiple, Q form |
| def : InstRW<[OlympusWrite_9c_4L_8V], (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_9c_4L_8V], (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; |
| |
| // Load 4 element, one lane, B/H/S |
| def : InstRW<[OlympusWrite_8c_1L_4V], (instregex "^LD4i(8|16|32)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_4V], (instregex "^LD4i(8|16|32)_POST$")>; |
| |
| // Load 4 element, one lane, D |
| def : InstRW<[OlympusWrite_8c_2L_4V], (instrs LD4i64)>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_4V], (instrs LD4i64_POST)>; |
| |
| // Load 4 element, all lanes, B/H/S |
| def : InstRW<[OlympusWrite_8c_1L_4V], (instregex "^LD4Rv(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_4V], (instregex "^LD4Rv(8b|4h|2s)_POST$")>; |
| def : InstRW<[OlympusWrite_8c_1L_4V], (instregex "^LD4Rv(16b|8h|4s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_1L_4V], (instregex "^LD4Rv(16b|8h|4s)_POST$")>; |
| |
| // Load 4 element, all lanes, D |
| def : InstRW<[OlympusWrite_8c_2L_4V], (instregex "^LD4Rv[12]d$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_2L_4V], (instregex "^LD4Rv[12]d_POST$")>; |
| |
| // 3.17 SIMD store instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Store 1 element, multiple, 1 register |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Onev(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Onev(8b|4h|2s|1d)_POST$")>; |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Onev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Onev(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 1 element, multiple, 2 registers, D form |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Twov(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_1SA_1V0123], (instregex "^ST1Twov(8b|4h|2s|1d)_POST$")>; |
| |
| // Store 1 element, multiple, 2 registers, Q form |
| def : InstRW<[OlympusWrite_3c_2SA_2V0123], (instregex "^ST1Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_3c_2SA_2V0123], (instregex "^ST1Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 1 element, multiple, 3 registers, D form |
| def : InstRW<[OlympusWrite_4c_2SA_2V0123], (instregex "^ST1Threev(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_2SA_2V0123], (instregex "^ST1Threev(8b|4h|2s|1d)_POST$")>; |
| |
| // Store 1 element, multiple, 3 registers, Q form |
| def : InstRW<[OlympusWrite_4c_3SA_3V0123], (instregex "^ST1Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_3SA_3V0123], (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 1 element, multiple, 4 registers, D form |
| def : InstRW<[OlympusWrite_4c_2SA_2V0123], (instregex "^ST1Fourv(8b|4h|2s|1d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_2SA_2V0123], (instregex "^ST1Fourv(8b|4h|2s|1d)_POST$")>; |
| |
| // Store 1 element, multiple, 4 registers, Q form |
| def : InstRW<[OlympusWrite_4c_4SA_4V0123], (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_4c_4SA_4V0123], (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 1 element, one lane |
| def : InstRW<[OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST1i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST1i(8|16|32|64)_POST$")>; |
| |
| // Store 2 element, multiple, D form |
| def : InstRW<[OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST2Twov(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST2Twov(8b|4h|2s)_POST$")>; |
| |
| // Store 2 element, multiple, Q form |
| def : InstRW<[OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST2Twov(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 2 element, one lane |
| def : InstRW<[OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST2i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_1SA_1V_1V0123], (instregex "^ST2i(8|16|32|64)_POST$")>; |
| |
| // Store 3 element, multiple, D form |
| def : InstRW<[OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST3Threev(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST3Threev(8b|4h|2s)_POST$")>; |
| |
| // Store 3 element, multiple, Q form |
| def : InstRW<[OlympusWrite_6c_3SA_3V_3V0123], (instregex "^ST3Threev(16b|8h|4s|2d)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_3SA_3V_3V0123], (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; |
| |
| // Store 3 element, one lane |
| def : InstRW<[OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST3i(8|16|32|64)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST3i(8|16|32|64)_POST$")>; |
| |
| // Store 4 element, multiple, D form |
| def : InstRW<[OlympusWrite_7c_2SA_4V_2V0123], (instregex "^ST4Fourv(8b|4h|2s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_7c_2SA_4V_2V0123], (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; |
| |
| // Store 4 element, multiple, Q form, D element |
| def : InstRW<[OlympusWrite_6c_4SA_4V_4V0123], (instrs ST4Fourv2d)>; |
| def : InstRW<[WriteAdr, OlympusWrite_6c_4SA_4V_4V0123], (instrs ST4Fourv2d_POST)>; |
| |
| // Store 4 element, multiple, Q form, B/H/S element |
| def : InstRW<[OlympusWrite_8c_4SA_8V_4V0123], (instregex "^ST4Fourv(16b|8h|4s)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_8c_4SA_8V_4V0123], (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; |
| |
| // Store 4 element, one lane, D |
| def : InstRW<[OlympusWrite_5c_2SA_2V_2V0123], (instrs ST4i64)>; |
| def : InstRW<[WriteAdr, OlympusWrite_5c_2SA_2V_2V0123], (instrs ST4i64_POST)>; |
| |
| // Store 4 element, one lane, B/H/S |
| def : InstRW<[OlympusWrite_7c_1SA_2V_1V0123], (instregex "^ST4i(8|16|32)$")>; |
| def : InstRW<[WriteAdr, OlympusWrite_7c_1SA_2V_1V0123], (instregex "^ST4i(8|16|32)_POST$")>; |
| |
| // 3.18 SVE predicate instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Predicate logical |
| // Predicate logical, flag setting |
| def : InstRW<[OlympusWrite_1c_1M], (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S?_PPzPP")>; |
| // Loop terminate |
| def : InstRW<[OlympusWrite_1c_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; |
| // Predicate test |
| def : InstRW<[OlympusWrite_1c_1M], (instrs PTEST_PP, PTEST_PP_ANY, PTEST_PP_FIRST)>; |
| // Predicate select |
| def : InstRW<[OlympusWrite_1c_1M], (instrs SEL_PPPP)>; |
| |
| // Control, counting |
| // Predicate counting scalar |
| def : InstRW<[OlympusWrite_2c_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; |
| def : InstRW<[OlympusWrite_2c_1M], |
| (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", |
| "^SQ(DEC|INC)[BHWD]_XPiWdI", |
| "^UQ(DEC|INC)[BHWD]_WPiI")>; |
| // Loop control, based on predicate |
| def : InstRW<[OlympusWrite_2c_1M], (instrs BRKA_PPmP, BRKA_PPzP, |
| BRKB_PPmP, BRKB_PPzP)>; |
| // Loop control, based on predicate and flag setting |
| def : InstRW<[OlympusWrite_2c_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; |
| // Loop control, propagating |
| def : InstRW<[OlympusWrite_2c_1M], (instrs BRKN_PPzP, BRKPA_PPzPP, |
| BRKPB_PPzPP)>; |
| // Loop control, propagating and flag setting |
| def : InstRW<[OlympusWrite_2c_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, |
| BRKPBS_PPzPP)>; |
| // Predicate counting scalar, active predicate |
| def : InstRW<[OlympusWrite_2c_1M], |
| (instregex "^CNTP_XPP_[BHSD]", |
| "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", |
| "^(UQDEC|UQINC)P_WP_[BHSD]", |
| "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; |
| // Predicate find first/next |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; |
| // Loop control, based on GPR |
| def : InstRW<[OlympusWrite_2c_1M], |
| (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; |
| |
| // Misc processing |
| // Predicate set |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; |
| // Predicate set/initialize, set flags |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^PTRUES_[BHSD]")>; |
| // Predicate unpack and widen |
| def : InstRW<[OlympusWrite_2c_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; |
| // Predicate reverse |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^REV_PP_[BHSD]")>; |
| // Predicate transpose |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; |
| // Predicate zip/unzip |
| def : InstRW<[OlympusWrite_2c_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; |
| |
| // Predicate counting scalar, ALL #{1,2,4,8} |
| // Predicate counting scalar, other |
| def : InstRW<[OlympusWrite_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; |
| |
| // Predicate counting vector |
| def : InstRW<[OlympusWrite_7c_2M_1V], |
| (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]")>; |
| |
| // Read first fault register, unpredicated |
| def : InstRW<[OlympusWrite_2c_1M0], (instrs RDFFR_P)>; |
| |
| // Read first fault register, predicated |
| // Read first fault register and set flags |
| def : InstRW<[OlympusWrite_3c_1M_1M0], (instrs RDFFR_PPz, RDFFRS_PPz)>; |
| |
| // Write/Set first fault register |
| def : InstRW<[OlympusWrite_1c_1M0], (instrs WRFFR, SETFFR)>; |
| |
| // 3.19 SVE floating-point instructions |
| // ----------------------------------------------------------------------------- |
| |
| // FP arithmetic |
| // FABD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABD_ZP[mZ]Z_[HSD]")>; |
| // FABS |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FABS_ZPmZ_[HSD]")>; |
| // FADD (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADD_ZP[mZ]I_[HSD]")>; |
| // FADD (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADD_ZP[mZ]Z_[HSD]")>; |
| // FADDP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADDP_ZPmZZ_[HSD]")>; |
| // FADD (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FADD_ZZZ_[HSD]")>; |
| // FAMAX |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FAMAX_ZP[mZ]Z_[HSD]")>; |
| // FMAX (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAX_ZP[mZ]I_[HSD]")>; |
| // FMAX (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAX_ZP[mZ]Z_[HSD]")>; |
| // FMAXNM (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAXNM_ZP[mZ]I_[HSD]")>; |
| // FMAXNM (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAXNM_ZP[mZ]Z_[HSD]")>; |
| // FMAXNMP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAXNMP_ZPmZZ_[HSD]")>; |
| // FMAXP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMAXP_ZPmZZ_[HSD]")>; |
| // FAMIN |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FAMIN_ZP[mZ]Z_[HSD]")>; |
| // FMIN (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMIN_ZP[mZ]I_[HSD]")>; |
| // FMIN (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMIN_ZP[mZ]Z_[HSD]")>; |
| // FMINNM (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMINNM_ZP[mZ]I_[HSD]")>; |
| // FMINNM (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMINNM_ZP[mZ]Z_[HSD]")>; |
| // FMINNMP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMINNMP_ZPmZZ_[HSD]")>; |
| // FMINP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FMINP_ZPmZZ_[HSD]")>; |
| // FCPY |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FCPY_ZPmI_[HSD]")>; |
| // FDUP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FDUP_ZI_[HSD]")>; |
| // FNEG |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FNEG_ZPmZ_[HSD]")>; |
| // FSUB (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUB_ZP[mZ]I_[HSD]")>; |
| // FSUB (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUB_ZP[mZ]Z_[HSD]")>; |
| // FSUBR (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUBR_ZP[mZ]I_[HSD]")>; |
| // FSUBR (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUBR_ZP[mZ]Z_[HSD]")>; |
| // FSUB (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^FSUB_ZZZ_[HSD]")>; |
| |
| // FP associative add, F16 |
| def : InstRW<[OlympusWrite_11c_1V03_10], (instrs FADDA_VPZ_H)>; |
| |
| // FP associative add, F32 |
| def : InstRW<[OlympusWrite_7c_1V03_6], (instrs FADDA_VPZ_S)>; |
| |
| // FP associative add, F64 |
| def : InstRW<[OlympusWrite_4c_2V], (instrs FADDA_VPZ_D)>; |
| |
| // FP compare |
| // FAC<cc> |
| def : InstRW<[OlympusWrite_2c_1V03], (instregex "^FACG[ET]_PPzZZ_[HSD]")>; |
| // FCM<cc> (zero) |
| def : InstRW<[OlympusWrite_2c_1V03], (instregex "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]")>; |
| // FCM<cc> (vectors) |
| def : InstRW<[OlympusWrite_2c_1V03], (instregex "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]")>; |
| |
| // FP multiply |
| def OlympusWr_ZFMul : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| // FMUL (immediate) |
| def : InstRW<[OlympusWr_ZFMul], (instregex "^FMUL_ZP[mZ]I_[HSD]")>; |
| // FMUL (vectors, predicated) |
| def : InstRW<[OlympusWr_ZFMul], (instregex "^FMUL_ZP[mZ]Z_[HSD]")>; |
| // FMULX |
| def : InstRW<[OlympusWr_ZFMul], (instregex "^FMULX_ZP[mZ]Z_[HSD]")>; |
| // FMUL (vectors, unpredicated) |
| def : InstRW<[OlympusWr_ZFMul], (instregex "^FMUL_ZZZ_[HSD]")>; |
| // FMUL (indexed) |
| def : InstRW<[OlympusWr_ZFMul], (instregex "^FMUL_ZZZI_[HSD]")>; |
| |
| // FP misc |
| // FCADD |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; |
| // FSCALE |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^FSCALE_ZPmZ_[HSD]")>; |
| // FTSMUL |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^FTSMUL_ZZZ_[HSD]")>; |
| // FTSSEL |
| def : InstRW<[OlympusWrite_3c_1V], (instregex "^FTSSEL_ZZZ_[HSD]")>; |
| |
| // FP multiply accumulate |
| def OlympusWr_ZFMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_ZFMA : SchedReadAdvance<0, [OlympusWr_ZFMul, OlympusWr_ZFMA], [1, 2]>; |
| // FMLA (vectors) |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FMLA_ZP[mZ]ZZ_[HSD]")>; |
| // FMLS (vectors) |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FMLS_ZP[mZ]ZZ_[HSD]")>; |
| // FNMLA |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FNMLA_ZP[mZ]ZZ_[HSD]")>; |
| // FNMLS |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FNMLS_ZP[mZ]ZZ_[HSD]")>; |
| // FMLA (indexed) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instregex "^FMLA_ZZZI_[HSD]")>; |
| // FMLALB (vectors, FP16 to FP32) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLALB_ZZZ_SHH)>; |
| // FMLALB (indexed, FP16 to FP32) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLALB_ZZZI_SHH)>; |
| // FMLALT (vectors, FP16 to FP32) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLALT_ZZZ_SHH)>; |
| // FMLALT (indexed, FP16 to FP32) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLALT_ZZZI_SHH)>; |
| // FMLS (indexed) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instregex "^FMLS_ZZZI_[HSD]")>; |
| // FMLSLB (vectors) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLSLB_ZZZ_SHH)>; |
| // FMLSLB (indexed) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLSLB_ZZZI_SHH)>; |
| // FMLSLT (vectors) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLSLT_ZZZ_SHH)>; |
| // FMLSLT (indexed) |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instrs FMLSLT_ZZZI_SHH)>; |
| // FMAD |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FMAD_ZPmZZ_[HSD]")>; |
| // FNMAD |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FNMAD_ZPmZZ_[HSD]")>; |
| // FMSB |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FMSB_ZPmZZ_[HSD]")>; |
| // FNMSB |
| def : InstRW<[OlympusWr_ZFMA, ReadDefault, OlympusRd_ZFMA], (instregex "^FNMSB_ZPmZZ_[HSD]")>; |
| // FRECPS |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^FRECPS_ZZZ_[HSD]")>; |
| // FRSQRTS |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^FRSQRTS_ZZZ_[HSD]")>; |
| // FTMAD |
| def : InstRW<[OlympusWr_ZFMA, OlympusRd_ZFMA], (instregex "^FTMAD_ZZI_[HSD]")>; |
| |
| // FP complex multiply accumulate |
| def OlympusWr_ZFCMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 5; } |
| def OlympusRd_ZFCMA : SchedReadAdvance<3, [OlympusWr_ZFCMA]>; |
| // FCMLA (vectors) |
| def : InstRW<[OlympusWr_ZFCMA, ReadDefault, OlympusRd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; |
| // FCMLA (indexed) |
| def : InstRW<[OlympusWr_ZFCMA, OlympusRd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; |
| |
| // FP convert, to/from F64 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", |
| "^FCVTLT_ZPmZ_StoD", |
| "^FCVTNT_ZPmZ_DtoS")>; |
| // FCVTX |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTX_ZPmZ_DtoS)>; |
| // FCVTXNT |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTXNT_ZPmZ_DtoS)>; |
| |
| // FP convert, F32 to/from F16 |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", |
| "^FCVTLT_ZPmZ_HtoS", |
| "^FCVTNT_ZPmZ_StoH")>; |
| |
| // FP logarithm, estimates, round, int convert, F16 |
| // FCVTZS |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FCVTZS_ZPmZ_HtoH")>; |
| // FCVTZU |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FCVTZU_ZPmZ_HtoH")>; |
| // FLOGB |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FLOGB_ZP[mZ]Z_H")>; |
| // FRECPE |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instrs FRECPE_ZZ_H)>; |
| // FRECPX |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FRECPX_ZPmZ_H")>; |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instrs FRSQRTE_ZZ_H)>; |
| // FRINT<r> |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; |
| |
| // FP logarithm, estimates, round, int convert, F32 |
| // FCVTZS |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVTZS_ZPmZ_(HtoS|StoS)")>; |
| // FCVTZU |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FCVTZU_ZPmZ_(HtoS|StoS)")>; |
| // FLOGB |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FLOGB_ZP[mZ]Z_S")>; |
| // FRECPE |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs FRECPE_ZZ_S)>; |
| // FRECPX |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRECPX_ZPmZ_S")>; |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs FRSQRTE_ZZ_S)>; |
| // FRINT<r> |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; |
| |
| // FP logarithm, estimates, round, int convert, F64 |
| // FCVTZS |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTZS_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; |
| // FCVTZU |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FCVTZU_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; |
| // FLOGB |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FLOGB_ZP[mZ]Z_D")>; |
| // FRECPE |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FRECPE_ZZ_D)>; |
| // FRECPX |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRECPX_ZPmZ_D")>; |
| // FRSQRTE |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FRSQRTE_ZZ_D)>; |
| // FRINT<r> |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; |
| |
| // FP exponent |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^FEXPA_ZZ_[HSD]")>; |
| |
| // FP divide, sqrt, F16 |
| def : InstRW<[OlympusWrite_13c_1V12_8], (instregex "^FDIVR?_ZP[mZ]Z_H", "^FSQRT_ZPmZ_H")>; |
| |
| // FP divide, sqrt, F32 |
| def : InstRW<[OlympusWrite_11c_1V12_4], (instregex "^FDIVR?_ZP[mZ]Z_S", "^FSQRT_ZPmZ_S")>; |
| |
| // FP divide, sqrt, F64 |
| def : InstRW<[OlympusWrite_14c_1V12_2], (instregex "^FDIVR?_ZP[mZ]Z_D", "^FSQRT_ZPmZ_D")>; |
| |
| // FP reduction, F16 |
| def : InstRW<[OlympusWrite_8c_4V], (instregex "^(FADD|FMAX(NM)?|FMIN(NM)?)V_VPZ_H")>; |
| |
| // FP reduction, F32 |
| def : InstRW<[OlympusWrite_6c_3V], (instregex "^(FADD|FMAX(NM)?|FMIN(NM)?)V_VPZ_S")>; |
| |
| // FP reduction, F64 |
| def : InstRW<[OlympusWrite_4c_2V], (instregex "^(FADD|FMAX(NM)?|FMIN(NM)?)V_VPZ_D")>; |
| |
| // 3.20 SVE FP8 instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Convert to/from F16 |
| // F1CVT, F2CVT |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^F[12]CVT_ZZ_BtoH")>; |
| // F1CVTLT, F2CVTLT |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^F[12]CVTLT_ZZ_BtoH")>; |
| // FCVTN |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs FCVTN_Z2Z_HtoB)>; |
| |
| // Convert from F32 |
| // FCVTNB |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTNB_Z2Z_StoB)>; |
| // FCVTNT (unpredicated) |
| def : InstRW<[OlympusWrite_3c_1V0123], (instrs FCVTNT_Z2Z_StoB)>; |
| |
| // Dot product, multiply accumulate |
| def OlympusWr_ZF8MA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_ZF8MA : SchedReadAdvance<2, [OlympusWr_ZF8MA]>; |
| // FDOT (4-way, vectors) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FDOT_ZZZ_BtoS)>; |
| // FDOT (4-way, indexed) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FDOT_ZZZI_BtoS)>; |
| // FDOT (2-way, vectors, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FDOT_ZZZ_BtoH)>; |
| // FDOT (2-way, indexed, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FDOT_ZZZI_BtoH)>; |
| // FMLALB (vectors, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALB_ZZZ)>; |
| // FMLALB (indexed, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALB_ZZZI)>; |
| // FMLALLBB (vectors) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLBB_ZZZ)>; |
| // FMLALLBB (indexed) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLBB_ZZZI)>; |
| // FMLALLBT (vectors) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLBT_ZZZ)>; |
| // FMLALLBT (indexed) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLBT_ZZZI)>; |
| // FMLALLTB (vectors) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLTB_ZZZ)>; |
| // FMLALLTB (indexed) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLTB_ZZZI)>; |
| // FMLALLTT (vectors) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLTT_ZZZ)>; |
| // FMLALLTT (indexed) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALLTT_ZZZI)>; |
| // FMLALT (vectors, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALT_ZZZ)>; |
| // FMLALT (indexed, FP8 to FP16) |
| def : InstRW<[OlympusWr_ZF8MA, OlympusRd_ZF8MA], (instrs FMLALT_ZZZI)>; |
| |
| // 3.21 SVE BF16 instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Convert to/from F8 |
| // BF1CVT, BF2CVT |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^BF[12]CVT_ZZ_BtoH")>; |
| // BF1CVTLT, BF2CVTLT |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^BF[12]CVTLT_ZZ_BtoH")>; |
| // BFCVTN |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instrs BFCVTN_Z2Z_HtoB)>; |
| |
| // Convert from F32 |
| def : InstRW<[OlympusWrite_4c_1V0123], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; |
| |
| // Multiply accumulate |
| def : InstRW<[OlympusWr_ZBFMAL, OlympusRd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; |
| |
| // Dot product |
| def : InstRW<[OlympusWr_ZBFDOT, OlympusRd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; |
| |
| // Matrix multiply accumulate |
| def : InstRW<[OlympusWr_ZBFMMA, OlympusRd_ZBFMMA], (instrs BFMMLA_ZZZ_HtoS)>; |
| |
| // 3.22 SVE integer instructions |
| // ----------------------------------------------------------------------------- |
| |
| // General |
| // ABS |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ABS_ZPmZ_[BHSD]")>; |
| // ADCLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADCLB_ZZZ_[SD]")>; |
| // ADCLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADCLT_ZZZ_[SD]")>; |
| // ADD (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADD_ZP[mZ]Z_[BHSD]")>; |
| // ADDP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; |
| // ADD (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADD_ZI_[BHSD]")>; |
| // ADD (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADD_ZZZ_[BHSD]")>; |
| // CADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CADD_ZZI_[BHSD]")>; |
| // ADDHNB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDHNB_ZZZ_[BHS]")>; |
| // ADDHNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADDHNT_ZZZ_[BHS]")>; |
| // RADDHNB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RADDHNB_ZZZ_[BHS]")>; |
| // RADDHNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RADDHNT_ZZZ_[BHS]")>; |
| // ADR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ADR_LSL_ZZZ_[SD]_[0123]", |
| "^ADR_[SU]XTW_ZZZ_D_[0123]")>; |
| // AND (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^AND_ZP[mZ]Z_[BHSD]")>; |
| // AND (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^AND_ZI")>; |
| // AND (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs AND_ZZZ)>; |
| // ASR (immediate, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASR_ZP[mZ]I_[BHSD]")>; |
| // ASR (wide elements, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASR_WIDE_ZPmZ_[BHS]")>; |
| // ASR (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASR_ZP[mZ]Z_[BHSD]")>; |
| // ASRR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASRR_ZPmZ_[BHSD]")>; |
| // ASR (immediate, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASR_ZZI_[BHSD]")>; |
| // ASR (wide elements, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ASR_WIDE_ZZZ_[BHS]")>; |
| // BIC (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^BIC_ZP[mZ]Z_[BHSD]")>; |
| // BIC (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BIC_ZZZ)>; |
| // BSL1N |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BSL1N_ZZZZ)>; |
| // BSL2N |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BSL2N_ZZZZ, EON_ZZZ)>; |
| // BSL |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BSL_ZZZZ)>; |
| // CLS |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CLS_ZPmZ_[BHSD]")>; |
| // CLZ |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CLZ_ZPmZ_[BHSD]")>; |
| // CNOT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CNOT_ZPmZ_[BHSD]")>; |
| // CNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CNT_ZPmZ_[BHSD]")>; |
| // CPY (immediate, zeroing) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CPY_ZPzI_[BHSD]")>; |
| // CPY (immediate, merging) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CPY_ZPmI_[BHSD]")>; |
| // CPY (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^CPY_ZPmV_[BHSD]")>; |
| // DECD, DECH, DECW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^DEC[HWD]_ZPiI")>; |
| // DUP (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^DUP_ZI_[BHSD]")>; |
| // DUP (indexed) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^DUP_ZZI_[BHSDQ]")>; |
| // DUPM |
| def : InstRW<[OlympusWrite_2c_1V], (instrs DUPM_ZI)>; |
| // EOR (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EOR_ZP[mZ]Z_[BHSD]")>; |
| // EOR (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EOR_ZI")>; |
| // EOR (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs EOR_ZZZ)>; |
| // EORBT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EORBT_ZZZ_[BHSD]")>; |
| // EORTB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EORTB_ZZZ_[BHSD]")>; |
| // EXT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^EXT_ZZI")>; |
| // HISTCNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^HISTCNT_ZPzZZ_[SD]")>; |
| // HISTSEG |
| def : InstRW<[OlympusWrite_2c_1V], (instrs HISTSEG_ZZZ)>; |
| // INCD, INCH, INCW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^INC[HWD]_ZPiI")>; |
| // INSR (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^INSR_ZV_[BHSD]")>; |
| // LSL (immediate, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSL_ZP[mZ]I_[BHSD]")>; |
| // LSL (wide elements, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSL_WIDE_ZPmZ_[BHS]")>; |
| // LSL (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSL_ZP[mZ]Z_[BHSD]")>; |
| // LSLR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSLR_ZPmZ_[BHSD]")>; |
| // LSR (immediate, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSR_ZP[mZ]I_[BHSD]")>; |
| // LSR (wide elements, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSR_WIDE_ZPmZ_[BHS]")>; |
| // LSR (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSR_ZP[mZ]Z_[BHSD]")>; |
| // LSRR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSRR_ZPmZ_[BHSD]")>; |
| // LSL (immediate, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSL_ZZI_[BHSD]")>; |
| // LSL (wide elements, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSL_WIDE_ZZZ_[BHS]")>; |
| // LSR (immediate, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSR_ZZI_[BHSD]")>; |
| // LSR (wide elements, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LSR_WIDE_ZZZ_[BHS]")>; |
| // LUTI2 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LUTI2_ZZZI_[BH]")>; |
| // LUTI4 |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^LUTI4_ZZZI_[BH]", |
| "^LUTI4_Z2ZZI")>; |
| // MOVPRFX (predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]")>; |
| // MOVPRFX (unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs MOVPRFX_ZZ)>; |
| // NBSL |
| def : InstRW<[OlympusWrite_2c_1V], (instrs NBSL_ZZZZ, NAND_ZZZ, NOR_ZZZ)>; |
| // NEG |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^NEG_ZPmZ_[BHSD]")>; |
| // NOT (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^NOT_ZPmZ_[BHSD]")>; |
| // ORR (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ORR_ZP[mZ]Z_[BHSD]")>; |
| // ORR (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ORR_ZI")>; |
| // ORR (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs ORR_ZZZ)>; |
| // PMUL |
| def : InstRW<[OlympusWrite_2c_1V], (instrs PMUL_ZZZ_B)>; |
| // PMULLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^PMULLB_ZZZ_[HDQ]")>; |
| // PMULLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^PMULLT_ZZZ_[HDQ]")>; |
| // RBIT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RBIT_ZPmZ_[BHSD]")>; |
| // REV (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^REV_ZZ_[BHSD]")>; |
| // REVB, REVH, REVW |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^REVB_ZPmZ_[HSD]", |
| "^REVH_ZPmZ_[SD]", |
| "^REVW_ZPmZ_D")>; |
| // SBCLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SBCLB_ZZZ_[SD]")>; |
| // SBCLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SBCLT_ZZZ_[SD]")>; |
| // SEL (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; |
| // SHRNB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHRNB_ZZI_[BHS]")>; |
| // SHRNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHRNT_ZZI_[BHS]")>; |
| // SLI |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SLI_ZZI_[BHSD]")>; |
| // SRI |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SRI_ZZI_[BHSD]")>; |
| // SUB (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUB_ZP[mZ]Z_[BHSD]")>; |
| // SUBR (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUBR_ZP[mZ]Z_[BHSD]")>; |
| // SUB (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUB_ZI_[BHSD]")>; |
| // SUB (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUB_ZZZ_[BHSD]")>; |
| // SUBR (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUBR_ZI_[BHSD]")>; |
| // RSUBHNB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RSUBHNB_ZZZ_[BHS]")>; |
| // RSUBHNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^RSUBHNT_ZZZ_[BHS]")>; |
| // SUBHNB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUBHNB_ZZZ_[BHS]")>; |
| // SUBHNT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUBHNT_ZZZ_[BHS]")>; |
| // TBL |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; |
| // TBX |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^TBX_ZZZ_[BHSD]")>; |
| // TRN1, TRN2 (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^TRN[12]_ZZZ_[BHSD]")>; |
| // UZP1, UZP2 (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UZP[12]_ZZZ_[BHSD]")>; |
| // ZIP1, ZIP2 (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^ZIP[12]_ZZZ_[BHSD]")>; |
| // SABD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SABD_ZP[mZ]Z_[BHSD]")>; |
| // UABD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UABD_ZP[mZ]Z_[BHSD]")>; |
| // SABDLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SABDLB_ZZZ_[HSD]")>; |
| // SABDLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SABDLT_ZZZ_[HSD]")>; |
| // UABDLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UABDLB_ZZZ_[HSD]")>; |
| // UABDLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UABDLT_ZZZ_[HSD]")>; |
| // SADDLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SADDLB_ZZZ_[HSD]")>; |
| // SADDLBT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SADDLBT_ZZZ_[HSD]")>; |
| // SADDLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SADDLT_ZZZ_[HSD]")>; |
| // SADDWB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SADDWB_ZZZ_[HSD]")>; |
| // SADDWT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SADDWT_ZZZ_[HSD]")>; |
| // UADDLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UADDLB_ZZZ_[HSD]")>; |
| // UADDLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UADDLT_ZZZ_[HSD]")>; |
| // UADDWB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UADDWB_ZZZ_[HSD]")>; |
| // UADDWT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UADDWT_ZZZ_[HSD]")>; |
| // SHADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHADD_ZPmZ_[BHSD]")>; |
| // SRHADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SRHADD_ZPmZ_[BHSD]")>; |
| // UHADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UHADD_ZPmZ_[BHSD]")>; |
| // URHADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^URHADD_ZPmZ_[BHSD]")>; |
| // SHSUB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHSUB_ZP[mZ]Z_[BHSD]")>; |
| // SHSUBR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SHSUBR_ZPmZ_[BHSD]")>; |
| // UHSUB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UHSUB_ZP[mZ]Z_[BHSD]")>; |
| // UHSUBR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UHSUBR_ZPmZ_[BHSD]")>; |
| // SMAX (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMAX_ZP[mZ]Z_[BHSD]")>; |
| // SMAXP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMAXP_ZPmZ_[BHSD]")>; |
| // UMAX (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMAX_ZP[mZ]Z_[BHSD]")>; |
| // UMAXP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMAXP_ZPmZ_[BHSD]")>; |
| // SMAX (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMAX_ZI_[BHSD]")>; |
| // UMAX (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMAX_ZI_[BHSD]")>; |
| // SMIN (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMIN_ZP[mZ]Z_[BHSD]")>; |
| // SMINP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMINP_ZPmZ_[BHSD]")>; |
| // UMIN (vectors) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMIN_ZP[mZ]Z_[BHSD]")>; |
| // UMINP |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMINP_ZPmZ_[BHSD]")>; |
| // SMIN (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SMIN_ZI_[BHSD]")>; |
| // UMIN (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UMIN_ZI_[BHSD]")>; |
| // SQADD (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQADD_ZPmZ_[BHSD]")>; |
| // UQADD (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQADD_ZPmZ_[BHSD]")>; |
| // USQADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USQADD_ZPmZ_[BHSD]")>; |
| // SQABS |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQABS_ZPmZ_[BHSD]")>; |
| // SQADD (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQADD_ZI_[BHSD]")>; |
| // SQADD (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQADD_ZZZ_[BHSD]")>; |
| // SQCADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQCADD_ZZI_[BHSD]")>; |
| // UQADD (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQADD_ZI_[BHSD]")>; |
| // UQADD (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQADD_ZZZ_[BHSD]")>; |
| // SQDECD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQDECD_ZPiI)>; |
| // SQDECH (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQDECH_ZPiI)>; |
| // SQDECW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQDECW_ZPiI)>; |
| // UQDECD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQDECD_ZPiI)>; |
| // UQDECH (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQDECH_ZPiI)>; |
| // UQDECW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQDECW_ZPiI)>; |
| // SQINCD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQINCD_ZPiI)>; |
| // SQINCH (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQINCH_ZPiI)>; |
| // SQINCW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs SQINCW_ZPiI)>; |
| // UQINCD (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQINCD_ZPiI)>; |
| // UQINCH (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQINCH_ZPiI)>; |
| // UQINCW (vector) |
| def : InstRW<[OlympusWrite_2c_1V], (instrs UQINCW_ZPiI)>; |
| // SQNEG |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQNEG_ZPmZ_[BHSD]")>; |
| // SQSUB (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQSUB_ZPmZ_[BHSD]")>; |
| // SQSUBR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQSUBR_ZPmZ_[BHSD]")>; |
| // UQSUB (vectors, predicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQSUB_ZPmZ_[BHSD]")>; |
| // UQSUBR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQSUBR_ZPmZ_[BHSD]")>; |
| // SQSUB (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQSUB_ZI_[BHSD]")>; |
| // SQSUB (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SQSUB_ZZZ_[BHSD]")>; |
| // UQSUB (immediate) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQSUB_ZI_[BHSD]")>; |
| // UQSUB (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UQSUB_ZZZ_[BHSD]")>; |
| // SSHLLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSHLLB_ZZI_[HSD]")>; |
| // SSHLLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSHLLT_ZZI_[HSD]")>; |
| // USHLLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USHLLB_ZZI_[HSD]")>; |
| // USHLLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USHLLT_ZZI_[HSD]")>; |
| // SSUBLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBLB_ZZZ_[HSD]")>; |
| // SSUBLBT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBLBT_ZZZ_[HSD]")>; |
| // SSUBLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBLT_ZZZ_[HSD]")>; |
| // SSUBLTB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBLTB_ZZZ_[HSD]")>; |
| // SSUBWB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBWB_ZZZ_[HSD]")>; |
| // SSUBWT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SSUBWT_ZZZ_[HSD]")>; |
| // USUBLB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USUBLB_ZZZ_[HSD]")>; |
| // USUBLT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USUBLT_ZZZ_[HSD]")>; |
| // USUBWB |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USUBWB_ZZZ_[HSD]")>; |
| // USUBWT |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^USUBWT_ZZZ_[HSD]")>; |
| // SUNPKHI, SUNPKLO |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUNPK(HI|LO)_ZZ_[HSD]")>; |
| // UUNPKHI, UUNPKLO |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UUNPK(HI|LO)_ZZ_[HSD]")>; |
| // SUQADD |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SUQADD_ZPmZ_[BHSD]")>; |
| // SXTB, SXTH, SXTW |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^SXTB_ZPmZ_[HSD]", |
| "^SXTH_ZPmZ_[SD]", |
| "^SXTW_ZPmZ_D")>; |
| // UXTB, UXTH, UXTW |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^UXTB_ZPmZ_[HSD]", |
| "^UXTH_ZPmZ_[SD]", |
| "^UXTW_ZPmZ_D")>; |
| |
| // Shift, complex |
| // ASRD |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^ASRD_ZP[mZ]I_[BHSD]")>; |
| // RSHRNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^RSHRNB_ZZI_[BHS]")>; |
| // RSHRNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^RSHRNT_ZZI_[BHS]")>; |
| // SQRSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHLR_ZPmZ_[BHSD]")>; |
| // SQRSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHL_ZP[mZ]Z_[BHSD]")>; |
| // SQSHL (immediate) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHL_ZP[mZ]I_[BHSD]")>; |
| // SQSHL (vectors) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHL_ZP[mZ]Z_[BHSD]")>; |
| // SQSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHLR_ZPmZ_[BHSD]")>; |
| // SQSHLU |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHLU_ZP[mZ]I_[BHSD]")>; |
| // SRSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SRSHL_ZP[mZ]Z_[BHSD]")>; |
| // SRSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SRSHLR_ZPmZ_[BHSD]")>; |
| // UQRSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQRSHLR_ZPmZ_[BHSD]")>; |
| // UQRSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQRSHL_ZP[mZ]Z_[BHSD]")>; |
| // UQSHL (immediate) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHL_ZP[mZ]I_[BHSD]")>; |
| // UQSHL (vectors) |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHL_ZP[mZ]Z_[BHSD]")>; |
| // UQSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHLR_ZPmZ_[BHSD]")>; |
| // URSHL |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^URSHL_ZP[mZ]Z_[BHSD]")>; |
| // URSHLR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^URSHLR_ZPmZ_[BHSD]")>; |
| // SQRSHRNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHRNB_ZZI_[BHS]")>; |
| // SQRSHRNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHRNT_ZZI_[BHS]")>; |
| // SQRSHRUNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHRUNB_ZZI_[BHS]")>; |
| // SQRSHRUNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQRSHRUNT_ZZI_[BHS]")>; |
| // SQSHRNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHRNB_ZZI_[BHS]")>; |
| // SQSHRNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHRNT_ZZI_[BHS]")>; |
| // SQSHRUNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHRUNB_ZZI_[BHS]")>; |
| // SQSHRUNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQSHRUNT_ZZI_[BHS]")>; |
| // UQRSHRNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQRSHRNB_ZZI_[BHS]")>; |
| // UQRSHRNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQRSHRNT_ZZI_[BHS]")>; |
| // UQSHRNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHRNB_ZZI_[BHS]")>; |
| // UQSHRNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQSHRNT_ZZI_[BHS]")>; |
| // SRSHR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SRSHR_ZP[mZ]I_[BHSD]")>; |
| // URSHR |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^URSHR_ZP[mZ]I_[BHSD]")>; |
| // SQXTNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQXTNB_ZZ_[BHS]")>; |
| // SQXTNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQXTNT_ZZ_[BHS]")>; |
| // SQXTUNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQXTUNB_ZZ_[BHS]")>; |
| // SQXTUNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^SQXTUNT_ZZ_[BHS]")>; |
| // UQXTNB |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQXTNB_ZZ_[BHS]")>; |
| // UQXTNT |
| def : InstRW<[OlympusWrite_4c_1V], (instregex "^UQXTNT_ZZ_[BHS]")>; |
| |
| // Shift and accumulate |
| def OlympusWr_ZSA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_ZSA : SchedReadAdvance<2, [OlympusWr_ZSA]>; |
| // SRSRA |
| def : InstRW<[OlympusWr_ZSA, OlympusRd_ZSA], (instregex "^SRSRA_ZZI_[BHSD]")>; |
| // SSRA |
| def : InstRW<[OlympusWr_ZSA, OlympusRd_ZSA], (instregex "^SSRA_ZZI_[BHSD]")>; |
| // URSRA |
| def : InstRW<[OlympusWr_ZSA, OlympusRd_ZSA], (instregex "^URSRA_ZZI_[BHSD]")>; |
| // USRA |
| def : InstRW<[OlympusWr_ZSA, OlympusRd_ZSA], (instregex "^USRA_ZZI_[BHSD]")>; |
| |
| // Multiply, B/H/S |
| // MUL (vectors, predicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL_ZP[mZ]Z_[BHS]")>; |
| // SMULH (predicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULH_ZP[mZ]Z_[BHS]")>; |
| // UMULH (predicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULH_ZP[mZ]Z_[BHS]")>; |
| // MUL (immediate) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL_ZI_[BHS]")>; |
| // MUL (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL_ZZZ_[BHS]")>; |
| // SMULH (unpredicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULH_ZZZ_[BHS]")>; |
| // SQDMULH (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULH_ZZZ_[BHS]")>; |
| // SQRDMULH (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQRDMULH_ZZZ_[BHS]")>; |
| // UMULH (unpredicated) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULH_ZZZ_[BHS]")>; |
| // SQDMULH (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULH_ZZZI_[HS]")>; |
| // SQRDMULH (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQRDMULH_ZZZI_[HS]")>; |
| // MUL (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^MUL_ZZZI_[HS]")>; |
| |
| // Multiply, D |
| // MUL (vectors, predicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instregex "^MUL_ZP[mZ]Z_D")>; |
| // SMULH (predicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instregex "^SMULH_ZP[mZ]Z_D")>; |
| // UMULH (predicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instregex "^UMULH_ZP[mZ]Z_D")>; |
| // MUL (vectors, unpredicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs MUL_ZZZ_D)>; |
| // SMULH (unpredicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs SMULH_ZZZ_D)>; |
| // SQDMULH (vectors) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs SQDMULH_ZZZ_D)>; |
| // SQRDMULH (vectors) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs SQRDMULH_ZZZ_D)>; |
| // UMULH (unpredicated) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs UMULH_ZZZ_D)>; |
| // MUL (immediate) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instregex "^MUL_ZI_D")>; |
| // SQDMULH (indexed) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs SQDMULH_ZZZI_D)>; |
| // SQRDMULH (indexed) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs SQRDMULH_ZZZI_D)>; |
| // MUL (indexed) |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs MUL_ZZZI_D)>; |
| |
| // Multiply long |
| // SMULLB (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULLB_ZZZ_[HSD]")>; |
| // SMULLB (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULLB_ZZZI_[SD]")>; |
| // SMULLT (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULLT_ZZZ_[HSD]")>; |
| // SMULLT (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SMULLT_ZZZI_[SD]")>; |
| // SQDMULLB (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULLB_ZZZ_[HSD]")>; |
| // SQDMULLB (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULLB_ZZZI_[SD]")>; |
| // SQDMULLT (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULLT_ZZZ_[HSD]")>; |
| // SQDMULLT (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^SQDMULLT_ZZZI_[SD]")>; |
| // UMULLB (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULLB_ZZZ_[HSD]")>; |
| // UMULLB (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULLB_ZZZI_[SD]")>; |
| // UMULLT (vectors) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULLT_ZZZ_[HSD]")>; |
| // UMULLT (indexed) |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^UMULLT_ZZZI_[SD]")>; |
| |
| // Multiply accumulate, B/H/S |
| def OlympusWr_ZMA_BHS : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusRd_ZMA_BHS : SchedReadAdvance<2, [OlympusWr_ZMA_BHS]>; |
| def OlympusWr_ZMASQ_BHS : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusWr_ZMASQ_D : SchedWriteRes<[OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusWr_ZMASQL : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusRd_ZMASQ : SchedReadAdvance<2, [OlympusWr_ZMASQL, OlympusWr_ZMASQ_BHS, OlympusWr_ZMASQ_D]>; |
| // CMLA (vectors) |
| def : InstRW<[OlympusWr_ZMA_BHS, OlympusRd_ZMA_BHS], (instregex "^CMLA_ZZZ_[BHS]")>; |
| // CMLA (indexed) |
| def : InstRW<[OlympusWr_ZMA_BHS, OlympusRd_ZMA_BHS], (instregex "^CMLA_ZZZI_[HS]")>; |
| // MLA (vectors) |
| def : InstRW<[OlympusWr_ZMA_BHS, ReadDefault, OlympusRd_ZMA_BHS], (instregex "^MLA_ZP[mZ]ZZ_[BHS]")>; |
| // MLS (vectors) |
| def : InstRW<[OlympusWr_ZMA_BHS, ReadDefault, OlympusRd_ZMA_BHS], (instregex "^MLS_ZP[mZ]ZZ_[BHS]")>; |
| // MLA (indexed) |
| def : InstRW<[OlympusWr_ZMA_BHS, OlympusRd_ZMA_BHS], (instregex "^MLA_ZZZI_[HS]")>; |
| // MLS (indexed) |
| def : InstRW<[OlympusWr_ZMA_BHS, OlympusRd_ZMA_BHS], (instregex "^MLS_ZZZI_[HS]")>; |
| // SQRDCMLAH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDCMLAH_ZZZ_[BHS]")>; |
| // SQRDMLAH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDMLAH_ZZZ_[BHS]")>; |
| // SQRDMLSH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDMLSH_ZZZ_[BHS]")>; |
| // SQRDCMLAH (indexed) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDCMLAH_ZZZI_[HS]")>; |
| // SQRDMLAH (indexed) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDMLAH_ZZZI_[HS]")>; |
| // SQRDMLSH (indexed) |
| def : InstRW<[OlympusWr_ZMASQ_BHS, OlympusRd_ZMASQ], (instregex "^SQRDMLSH_ZZZI_[HS]")>; |
| |
| // Multiply accumulate, D |
| def OlympusWr_ZMA_D : SchedWriteRes<[OlympusUnitV0123, OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusRd_ZMA_D : SchedReadAdvance<2, [OlympusWr_ZMA_D]>; |
| // CMLA (vectors) |
| def : InstRW<[OlympusWr_ZMA_D, OlympusRd_ZMA_D], (instrs CMLA_ZZZ_D)>; |
| // MLA (vectors) |
| def : InstRW<[OlympusWr_ZMA_D, ReadDefault, OlympusRd_ZMA_D], (instregex "^MLA_ZP[mZ]ZZ_D")>; |
| // MLS (vectors) |
| def : InstRW<[OlympusWr_ZMA_D, ReadDefault, OlympusRd_ZMA_D], (instregex "^MLS_ZP[mZ]ZZ_D")>; |
| // MLA (indexed) |
| def : InstRW<[OlympusWr_ZMA_D, OlympusRd_ZMA_D], (instrs MLA_ZZZI_D)>; |
| // MLS (indexed) |
| def : InstRW<[OlympusWr_ZMA_D, OlympusRd_ZMA_D], (instrs MLS_ZZZI_D)>; |
| // SQRDCMLAH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_D, OlympusRd_ZMASQ], (instrs SQRDCMLAH_ZZZ_D)>; |
| // SQRDMLAH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_D, OlympusRd_ZMASQ], (instrs SQRDMLAH_ZZZ_D)>; |
| // SQRDMLSH (vectors) |
| def : InstRW<[OlympusWr_ZMASQ_D, OlympusRd_ZMASQ], (instrs SQRDMLSH_ZZZ_D)>; |
| // SQRDMLAH (indexed) |
| def : InstRW<[OlympusWr_ZMASQ_D, OlympusRd_ZMASQ], (instrs SQRDMLAH_ZZZI_D)>; |
| // SQRDMLSH (indexed) |
| def : InstRW<[OlympusWr_ZMASQ_D, OlympusRd_ZMASQ], (instrs SQRDMLSH_ZZZI_D)>; |
| |
| // Multiply accumulate long |
| def OlympusWr_ZMAL : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 4; } |
| def OlympusRd_ZMAL : SchedReadAdvance<2, [OlympusWr_ZMAL]>; |
| // SMLALB (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLALB_ZZZ_[HSD]")>; |
| // SMLALB (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLALB_ZZZI_[SD]")>; |
| // SMLALT (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLALT_ZZZ_[HSD]")>; |
| // SMLALT (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLALT_ZZZI_[SD]")>; |
| // SMLSLB (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLSLB_ZZZ_[HSD]")>; |
| // SMLSLB (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLSLB_ZZZI_[SD]")>; |
| // SMLSLT (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLSLT_ZZZ_[HSD]")>; |
| // SMLSLT (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^SMLSLT_ZZZI_[SD]")>; |
| // SQDMLALB (vectors) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLALB_ZZZ_[HSD]")>; |
| // SQDMLALB (indexed) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLALB_ZZZI_[SD]")>; |
| // SQDMLALBT |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLALBT_ZZZ_[HSD]")>; |
| // SQDMLALT (vectors) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLALT_ZZZ_[HSD]")>; |
| // SQDMLALT (indexed) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLALT_ZZZI_[SD]")>; |
| // SQDMLSLB (vectors) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLSLB_ZZZ_[HSD]")>; |
| // SQDMLSLB (indexed) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLSLB_ZZZI_[SD]")>; |
| // SQDMLSLBT |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLSLBT_ZZZ_[HSD]")>; |
| // SQDMLSLT (vectors) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLSLT_ZZZ_[HSD]")>; |
| // SQDMLSLT (indexed) |
| def : InstRW<[OlympusWr_ZMASQL, OlympusRd_ZMASQ], (instregex "^SQDMLSLT_ZZZI_[SD]")>; |
| // UMLALB (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLALB_ZZZ_[HSD]")>; |
| // UMLALB (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLALB_ZZZI_[SD]")>; |
| // UMLALT (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLALT_ZZZ_[HSD]")>; |
| // UMLALT (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLALT_ZZZI_[SD]")>; |
| // UMLSLB (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLSLB_ZZZ_[HSD]")>; |
| // UMLSLB (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLSLB_ZZZI_[SD]")>; |
| // UMLSLT (vectors) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLSLT_ZZZ_[HSD]")>; |
| // UMLSLT (indexed) |
| def : InstRW<[OlympusWr_ZMAL, OlympusRd_ZMAL], (instregex "^UMLSLT_ZZZI_[SD]")>; |
| |
| // Multiply add/sub |
| def OlympusWr_ZMAD : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 5; } |
| def OlympusRd_ZMAD : SchedReadAdvance<2, [OlympusWr_ZMAD]>; |
| // MAD |
| def : InstRW<[OlympusWr_ZMAD, ReadDefault, OlympusRd_ZMAD], (instregex "^MAD_ZPmZZ_[BHSD]")>; |
| // MSB |
| def : InstRW<[OlympusWr_ZMAD, ReadDefault, OlympusRd_ZMAD], (instregex "^MSB_ZPmZZ_[BHSD]")>; |
| |
| // Other arithmetic accumulate |
| def OlympusWr_ZA : SchedWriteRes<[OlympusUnitV]> { let Latency = 4; } |
| def OlympusRd_ZA : SchedReadAdvance<2, [OlympusWr_ZA]>; |
| // SABA |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^SABA_ZZZ_[BHSD]")>; |
| // SABALB |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^SABALB_ZZZ_[HSD]")>; |
| // SABALT |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^SABALT_ZZZ_[HSD]")>; |
| // UABA |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^UABA_ZZZ_[BHSD]")>; |
| // UABALB |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^UABALB_ZZZ_[HSD]")>; |
| // UABALT |
| def : InstRW<[OlympusWr_ZA, OlympusRd_ZA], (instregex "^UABALT_ZZZ_[HSD]")>; |
| // SADALP |
| def : InstRW<[OlympusWr_ZA, ReadDefault, OlympusRd_ZA], (instregex "^SADALP_ZPmZ_[HSD]")>; |
| // UADALP |
| def : InstRW<[OlympusWr_ZA, ReadDefault, OlympusRd_ZA], (instregex "^UADALP_ZPmZ_[HSD]")>; |
| |
| // Dot product, matrix multiply, 8 bit |
| def OlympusWr_ZDOT_B : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| def OlympusRd_ZDOT_B : SchedReadAdvance<1, [OlympusWr_ZDOT_B]>; |
| def OlympusWr_ZMMA : SchedWriteRes<[OlympusUnitV]> { let Latency = 3; } |
| def OlympusRd_ZMMA : SchedReadAdvance<1, [OlympusWr_ZMMA]>; |
| // CDOT (vectors) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs CDOT_ZZZ_S)>; |
| // CDOT (indexed) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs CDOT_ZZZI_S)>; |
| // SDOT (4-way, vectors) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs SDOT_ZZZ_BtoS)>; |
| // UDOT (4-way, vectors) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs UDOT_ZZZ_BtoS)>; |
| // SDOT (4-way, indexed) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs SDOT_ZZZI_BtoS)>; |
| // UDOT (4-way, indexed) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs UDOT_ZZZI_BtoS)>; |
| // USDOT (vectors) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs USDOT_ZZZ)>; |
| // SUDOT |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs SUDOT_ZZZI)>; |
| // USDOT (indexed) |
| def : InstRW<[OlympusWr_ZDOT_B, OlympusRd_ZDOT_B], (instrs USDOT_ZZZI)>; |
| // SMMLA |
| def : InstRW<[OlympusWr_ZMMA, OlympusRd_ZMMA], (instrs SMMLA_ZZZ)>; |
| // UMMLA |
| def : InstRW<[OlympusWr_ZMMA, OlympusRd_ZMMA], (instrs UMMLA_ZZZ)>; |
| // USMMLA |
| def : InstRW<[OlympusWr_ZMMA, OlympusRd_ZMMA], (instrs USMMLA_ZZZ)>; |
| |
| // Dot product, 16 bit |
| def OlympusWr_ZDOT_H : SchedWriteRes<[OlympusUnitV0123]> { let Latency = 3; } |
| def OlympusRd_ZDOT_H : SchedReadAdvance<1, [OlympusWr_ZDOT_H]>; |
| // CDOT (vectors) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs CDOT_ZZZ_D)>; |
| // CDOT (indexed) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs CDOT_ZZZI_D)>; |
| // SDOT (4-way, vectors) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs SDOT_ZZZ_HtoD)>; |
| // UDOT (4-way, vectors) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs UDOT_ZZZ_HtoD)>; |
| // SDOT (4-way, indexed) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs SDOT_ZZZI_HtoD)>; |
| // UDOT (4-way, indexed) |
| def : InstRW<[OlympusWr_ZDOT_H, OlympusRd_ZDOT_H], (instrs UDOT_ZZZI_HtoD)>; |
| |
| // Bit manipulation |
| // BDEP |
| def : InstRW<[OlympusWrite_6c_2V12], (instregex "^BDEP_ZZZ_[BHSD]")>; |
| // BEXT |
| def : InstRW<[OlympusWrite_6c_2V12], (instregex "^BEXT_ZZZ_[BHSD]")>; |
| // BGRP |
| def : InstRW<[OlympusWrite_6c_2V12], (instregex "^BGRP_ZZZ_[BHSD]")>; |
| |
| // Compare and set flags |
| // CMP<cc> (immediate) |
| def : InstRW<[OlympusWrite_2c_1V03_or_1M_1V03], (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZI_[BHSD]")>; |
| // CMP<cc> (wide elements) |
| def : InstRW<[OlympusWrite_2c_1V03_or_1M_1V03], (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; |
| // CMP<cc> (vectors) |
| def : InstRW<[OlympusWrite_2c_1V03_or_1M_1V03], (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZZ_[BHSD]")>; |
| |
| // Extract, FP scalar and vector |
| // CLASTA (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^CLASTA_VPZ_[BHSD]")>; |
| // CLASTA (vectors) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^CLASTA_ZPZ_[BHSD]")>; |
| // CLASTB (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^CLASTB_VPZ_[BHSD]")>; |
| // CLASTB (vectors) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^CLASTB_ZPZ_[BHSD]")>; |
| // LASTA (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^LASTA_VPZ_[BHSD]")>; |
| // LASTB (SIMD&FP scalar) |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^LASTB_VPZ_[BHSD]")>; |
| // COMPACT |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^COMPACT_ZPZ_[SD]")>; |
| // SPLICE |
| def : InstRW<[OlympusWrite_3c_1V12], (instregex "^SPLICE_ZPZZ?_[BHSD]")>; |
| |
| // Extract, gen scalar conditional |
| def : InstRW<[OlympusWrite_8c_1M_1V03_1V12], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; |
| |
| // Extract, gen scalar unconditional |
| def : InstRW<[OlympusWrite_6c_1V03_1V12], (instregex "^LAST[AB]_RPZ_[BHSD]")>; |
| |
| // Convert int to FP, 64b or to F64 |
| def : InstRW<[OlympusWrite_3c_1V0123], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", |
| "^[SU]CVTF_ZPmZ_StoD")>; |
| |
| // Convert int to FP, 32b to F16/F32 |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; |
| |
| // Convert int to FP, 16b |
| def : InstRW<[OlympusWrite_6c_1V0123_4], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; |
| |
| // Copy/Insert, from gen |
| // CPY (scalar) |
| def : InstRW<[OlympusWrite_5c_1M_1V], (instregex "^CPY_ZPmR_[BHSD]")>; |
| // INSR (scalar) |
| def : InstRW<[OlympusWrite_5c_1M_1V], (instregex "^INSR_ZR_[BHSD]")>; |
| |
| // Duplicate, from gen |
| def : InstRW<[OlympusWrite_3c_1M], (instregex "^DUP_ZR_[BHSD]")>; |
| |
| // Divide |
| // Divides, 32 bit |
| def : InstRW<[OlympusWrite_12c_1V45], (instregex "^[SU]DIV_ZP[mZ]Z_S", "^[SU]DIVR_ZPmZ_S")>; |
| // Divides, 64 bit |
| def : InstRW<[OlympusWrite_20c_1V45], (instregex "^[SU]DIV_ZP[mZ]Z_D", "^[SU]DIVR_ZPmZ_D")>; |
| |
| // Index, immediates, B/H/S |
| def : InstRW<[OlympusWrite_4c_1V0123], (instregex "^INDEX_II_[BHS]")>; |
| |
| // Index, immediates, D |
| def : InstRW<[OlympusWrite_5c_1V0123_2], (instrs INDEX_II_D)>; |
| |
| // Index, scalar, B/H/S |
| def : InstRW<[OlympusWrite_7c_1M_1V0123], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; |
| |
| // Index, scalar, D |
| def : InstRW<[OlympusWrite_8c_1M_1V0123_2], (instregex "^INDEX_(IR|RI|RR)_D")>; |
| |
| // Matching operations |
| def : InstRW<[OlympusWrite_2c_1V03_or_1M_1V03], (instregex "^N?MATCH_PPzZZ_[BH]")>; |
| |
| // Reciprocal estimate |
| def : InstRW<[OlympusWrite_4c_1V0123_2], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; |
| |
| // Reduction, logical |
| def : InstRW<[OlympusWrite_6c_1V_1V0123], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; |
| |
| // Reduction, add, B form |
| def : InstRW<[OlympusWrite_11c_2V_2V0123], (instregex "^[SU]ADDV_VPZ_B")>; |
| |
| // Reduction, add, H form |
| def : InstRW<[OlympusWrite_9c_1V_2V0123], (instregex "^[SU]ADDV_VPZ_H")>; |
| |
| // Reduction, add, S form |
| def : InstRW<[OlympusWrite_8c_2V_1V0123], (instregex "^[SU]ADDV_VPZ_S")>; |
| |
| // Reduction, min/max, B form |
| def : InstRW<[OlympusWrite_9c_1V_2V0123], (instregex "^[SU](MAX|MIN)V_VPZ_B")>; |
| |
| // Reduction, min/max, H form |
| def : InstRW<[OlympusWrite_8c_2V_1V0123], (instregex "^[SU](MAX|MIN)V_VPZ_H")>; |
| |
| // Reduction, min/max, S form |
| def : InstRW<[OlympusWrite_6c_1V_1V0123], (instregex "^[SU](MAX|MIN)V_VPZ_S")>; |
| |
| // Reduction, D form |
| def : InstRW<[OlympusWrite_5c_2V], (instregex "^[SU](MAX|MIN)V_VPZ_D", |
| "^UADDV_VPZ_D")>; |
| |
| // 3.23 SVE cryptography instructions |
| // ----------------------------------------------------------------------------- |
| |
| // AES, SHA3 operations |
| // AESD |
| // AESE |
| // AESIMC |
| // AESMC |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^AES[DE]_ZZZ_B$", |
| "^AESI?MC_ZZ_B$")>; |
| // BCAX |
| def : InstRW<[OlympusWrite_2c_1V], (instrs BCAX_ZZZZ)>; |
| // EOR3 |
| def : InstRW<[OlympusWrite_2c_1V], (instrs EOR3_ZZZZ)>; |
| // RAX1 |
| def : InstRW<[OlympusWrite_2c_1V], (instrs RAX1_ZZZ_D)>; |
| // XAR |
| def : InstRW<[OlympusWrite_2c_1V], (instregex "^XAR_ZZZI_[BHSD]")>; |
| |
| // SM4 operations |
| def : InstRW<[OlympusWrite_4c_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; |
| |
| // 3.24 SVE load instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Load vector |
| def : InstRW<[OlympusWrite_6c_1L], (instrs LDR_ZXI)>; |
| |
| // Load predicate |
| def : InstRW<[OlympusWrite_5c_1M_1L], (instrs LDR_PXI)>; |
| |
| // Contiguous load single structure |
| // LD1* (scalar plus immediate, single register) |
| // LD1S* (scalar plus immediate) |
| // LD1* (scalar plus scalar, single register) |
| // LD1S* (scalar plus scalar) |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1[BHWD](_IMM)?$", |
| "^LD1S?B_[HSD](_IMM)?$", |
| "^LD1S?H_[SD](_IMM)?$", |
| "^LD1S?W_D(_IMM)?$")>; |
| // LD1R* |
| // LD1RS* |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1R[BHWD]_IMM$", |
| "^LD1RS?B_[HSD]_IMM$", |
| "^LD1RS?H_[SD]_IMM$", |
| "^LD1RW_D_IMM$", |
| "^LD1RSW_IMM$")>; |
| // LD1RQ* (scalar plus immediate) |
| // LD1RQ* (scalar plus scalar) |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LD1RQ_[BHWD](_IMM)?$")>; |
| // LDNF1* |
| // LDNF1S* |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LDNF1[BHWD]_IMM$", |
| "^LDNF1S?B_[HSD]_IMM$", |
| "^LDNF1S?H_[SD]_IMM$", |
| "^LDNF1S?W_D_IMM$")>; |
| // LDNT1* (scalar plus immediate, single register) |
| // LDNT1* (scalar plus scalar, single register) |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>; |
| // LDFF1* (scalar plus scalar) |
| // LDFF1S* (scalar plus scalar) |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^LDFF1[BHWD]$", |
| "^LDFF1S?B_[HSD]$", |
| "^LDFF1S?H_[SD]$", |
| "^LDFF1S?W_D$")>; |
| |
| // Contiguous load two structures, scalar + imm |
| def : InstRW<[OlympusWrite_8c_2L_2V], (instregex "^LD2[BHWD]_IMM$")>; |
| |
| // Contiguous load two structures, scalar + scalar |
| def : InstRW<[OlympusWrite_9c_1I_2L_2V], (instregex "^LD2[BHWD]$")>; |
| |
| // Contiguous load three structures, scalar + imm |
| def : InstRW<[OlympusWrite_8c_3L_3V], (instregex "^LD3[BHWD]_IMM$")>; |
| |
| // Contiguous load three structures, scalar + scalar |
| def : InstRW<[OlympusWrite_9c_1I_3L_3V], (instregex "^LD3[BHWD]$")>; |
| |
| // Contiguous load four structures, scalar + imm |
| def : InstRW<[OlympusWrite_9c_4L_8V], (instregex "^LD4[BHWD]_IMM$")>; |
| |
| // Contiguous load four structures, scalar + scalar |
| def : InstRW<[OlympusWrite_10c_1I_4L_8V], (instregex "^LD4[BHWD]$")>; |
| |
| // Gather load, vector + imm, 32-bit element |
| def : InstRW<[OlympusWrite_9c_4L_1V03], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", |
| "^GLD(FF)?1W_IMM$")>; |
| |
| // Gather load, vector + imm, 64-bit element |
| def : InstRW<[OlympusWrite_9c_2L_1V03], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", |
| "^GLD(FF)?1D_IMM$")>; |
| |
| // Gather load, scalar + vector, H form 32-bit scaled offset |
| def : InstRW<[OlympusWrite_9c_4L_2V03], (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$")>; |
| |
| // Gather load, scalar + vector, other 32-bit element |
| def : InstRW<[OlympusWrite_9c_4L_1V03], (instregex "^GLD(FF)?1W_[SU]XTW(_SCALED)?$", |
| "^GLD(FF)?1S?[BH]_S_[SU]XTW$")>; |
| |
| // Gather load, scalar + vector, 64-bit element |
| def : InstRW<[OlympusWrite_9c_2L_1V03], (instregex "^GLD(FF)?1S?B_D(_[SU]XTW)?$", |
| "^GLD(FF)?1S?[HW]_D(_[SU]XTW)?(_SCALED)?$", |
| "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>; |
| |
| // Gather non-temporal load, 32-bit element |
| def : InstRW<[OlympusWrite_9c_4L_1V03], (instregex "^LDNT1[BHW]_ZZR_S$", |
| "^LDNT1S[BH]_ZZR_S$")>; |
| |
| // Gather non-temporal load, 64-bit element |
| def : InstRW<[OlympusWrite_9c_2L_1V03], (instregex "^LDNT1S?[BHW]_ZZR_D$", |
| "^LDNT1D_ZZR_D$")>; |
| |
| // 3.25 SVE store instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Store from predicate |
| def : InstRW<[OlympusWrite_1c_1SA_1D], (instrs STR_PXI)>; |
| |
| // Store from vector |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instrs STR_ZXI)>; |
| |
| // Contiguous store single structure |
| // ST1* (scalar plus immediate, single register) |
| // ST1* (scalar plus scalar, single register) |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^ST1[BHWD](_IMM)?$", |
| "^ST1B_[HSD](_IMM)?$", |
| "^ST1H_[SD](_IMM)?$", |
| "^ST1W_D(_IMM)?$")>; |
| // STNT1* (scalar plus immediate, single register) |
| // STNT1* (scalar plus scalar, single register) |
| def : InstRW<[OlympusWrite_3c_1SA_1V0123], (instregex "^STNT1[BHWD]_ZR[IR]$")>; |
| |
| // Contiguous store two structures, scalar + imm |
| def : InstRW<[OlympusWrite_5c_2SA_2V_2V0123], (instregex "^ST2[BHWD]_IMM$")>; |
| |
| // Contiguous store two structures, scalar + scalar |
| def : InstRW<[OlympusWrite_5c_1I_2SA_2V_2V0123], (instregex "^ST2[BHWD]$")>; |
| |
| // Contiguous store three structures, scalar + imm |
| def : InstRW<[OlympusWrite_6c_3SA_3V_3V0123], (instregex "^ST3[BHWD]_IMM$")>; |
| |
| // Contiguous store three structures, scalar + scalar |
| def : InstRW<[OlympusWrite_6c_1I_3SA_3V_3V0123], (instregex "^ST3[BHWD]$")>; |
| |
| // Contiguous store four structures, scalar + imm, B/H/W |
| def : InstRW<[OlympusWrite_8c_4SA_8V_4V0123], (instregex "^ST4[BHW]_IMM$")>; |
| |
| // Contiguous store four structures, scalar + imm, D |
| def : InstRW<[OlympusWrite_6c_4SA_4V_4V0123], (instrs ST4D_IMM)>; |
| |
| // Contiguous store four structures, scalar + scalar, B/H/W |
| def : InstRW<[OlympusWrite_8c_1I_4SA_8V_4V0123], (instregex "^ST4[BHW]$")>; |
| |
| // Contiguous store four structures, scalar + scalar, D |
| def : InstRW<[OlympusWrite_6c_1I_4SA_4V_4V0123], (instrs ST4D)>; |
| |
| // Scatter store, vector + imm, 32-bit element |
| def : InstRW<[OlympusWrite_5c_4SA_1V03_4V0123], (instregex "^SST1[BH]_S_IMM$", |
| "^SST1W_IMM$")>; |
| |
| // Scatter store, vector + imm, 64-bit element |
| def : InstRW<[OlympusWrite_4c_2SA_1V03_2V0123], (instregex "^SST1[BHW]_D_IMM$", |
| "^SST1D_IMM$")>; |
| |
| // Scatter store, scalar + vector, H form 32-bit scaled offset |
| def : InstRW<[OlympusWrite_5c_4SA_2V03_4V0123], (instregex "^SST1H_S_[SU]XTW_SCALED$")>; |
| |
| // Scatter store, scalar + vector, 32-bit element |
| def : InstRW<[OlympusWrite_5c_4SA_1V03_4V0123], (instregex "^SST1[BH]_S_[SU]XTW$", |
| "^SST1W_[SU]XTW(_SCALED)?$")>; |
| |
| // Scatter store, scalar + vector, 64-bit element |
| def : InstRW<[OlympusWrite_4c_2SA_1V03_2V0123], (instregex "^SST1[BHW]_D(_[SU]XTW)?$", |
| "^SST1[HW]_D(_[SU]XTW)?_SCALED$", |
| "^SST1D(_[SU]XTW)?(_SCALED)?$")>; |
| |
| // Scatter non-temporal store, 32-bit element |
| def : InstRW<[OlympusWrite_5c_4SA_1V03_4V0123], (instregex "^STNT1[BHW]_ZZR_S")>; |
| |
| // Scatter non-temporal store, 64-bit element |
| def : InstRW<[OlympusWrite_4c_2SA_1V03_2V0123], (instregex "^STNT1[BHWD]_ZZR_D")>; |
| |
| // SVE Miscellaneous instructions |
| // ----------------------------------------------------------------------------- |
| |
| // Prefetch |
| // NOTE: Not specified in the SWOG. |
| def : InstRW<[OlympusWrite_6c_1L], (instregex "^PRF[BHWD]")>; |
| |
| } |