From bb087776cf01927ae2dadb88e268bea3536d1332 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 11 Apr 2023 17:34:01 +0100 Subject: [PATCH] add FP LD/ST to optable --- openpower/sv/rfc/ls012.mdwn | 7 ++-- openpower/sv/rfc/ls012/optable.csv | 51 +++++++++++++++++------------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/openpower/sv/rfc/ls012.mdwn b/openpower/sv/rfc/ls012.mdwn index 1433d43fe..c376ed275 100644 --- a/openpower/sv/rfc/ls012.mdwn +++ b/openpower/sv/rfc/ls012.mdwn @@ -46,17 +46,18 @@ or may not be Vectoriseable, but that every "Defined Word" should have merits on its own, not just when Vectorised. An example of a borderline Vectoriseable Defined Word is `mv.swizzle` which only really becomes high-priority for Audio/Video, Vector GPU and HPC Workloads, but has -less merit as a Scalar-only operation. +less merit as a Scalar-only operation, yet when SVP64Single-Prefixed +can be part of an atomic Compare-and-Swap sequence. Although one of the top world-class ISAs, Power ISA Scalar (SFFS) has not been significantly advanced in 12 years: IBM's primary focus has understandably been on PackedSIMD VSX. Unfortunately, with VSX being 914 instructions and 128-bit it is far too -much for any new team to consider (10 years development effort) and far +much for any new team to consider (10+ years development effort) and far outside of Embedded or Tablet/Desktop/Laptop power budgets. Thus bringing Power Scalar up-to-date to modern standards *and on its own merits* is a reasonable goal, and the advantages of the reduced focus is that -SFFS remains RISC-paradigm, and that lessons can be learned from other +SFFS remains RISC-paradigm, with lessons being be learned from other ISAs from the intervening years. Good examples here include `bmask`. SVP64 Prefixing - also known by the terms "Zero-Overhead-Loop-Prefixing" diff --git a/openpower/sv/rfc/ls012/optable.csv b/openpower/sv/rfc/ls012/optable.csv index bd629d28c..3113ff5ec 100644 --- a/openpower/sv/rfc/ls012/optable.csv +++ b/openpower/sv/rfc/ls012/optable.csv @@ -1,26 +1,33 @@ op, rfc, priority, cost, SVP64, group, PO1, page, regs -# LD/ST-Postincrement (FP TODO) -lbzup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedload, 1R2W -lbzupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -lhzup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedload, 1R2W -lhzupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -lhaup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedload, 1R2W -lhaupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -lwzup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedload, 1R2W -lwzupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -lwaupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -ldup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedload, 1R2W -ldupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedload, 2R2W -stbup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedstore, 2R1W -stbupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedstore, 3R1W -sthup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedstore, 2R1W -sthupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedstore, 3R1W -stwup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedstore, 2R1W -stwupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedstore, 3R1W -stdup, ls011, high, PO, yes, EXT2xx, ??, isa/pifixedstore, 2R1W -stdupx, ls011, high, 10, yes, EXT2xx, ??, isa/pifixedstore, 3R1W -FP-LD, ls011, high, ??, yes, EXT2xx, ??, TODO, -FP-ST, ls011, high, ??, yes, EXT2xx, ??, TODO, +# LD/ST-Postincrement +lbzup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lbzupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +lhzup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lhzupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +lhaup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lhaupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +lwzup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lwzupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +lwaupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +ldup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +ldupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +stbup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +stbupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W +sthup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +sthupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W +stwup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +stwupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W +stdup, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +stdupx, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W +# FP LD/ST-Postincrement +lfdu, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lfsu, ls011, high, PO, yes, EXT2xx, no, isa/pifixedload, 1R2W +lfdux, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +lsdux, ls011, high, 10, yes, EXT2xx, no, isa/pifixedload, 2R2W +stfdu, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +stfsu, ls011, high, PO, yes, EXT2xx, no, isa/pifixedstore, 2R1W +stfdux, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W +stfsux, ls011, high, 10, yes, EXT2xx, no, isa/pifixedstore, 3R1W # Bitmanip LUT2/3 operations. high cost high reward grevlut, TBD, high, 3, yes, TBD, no, sv/bitmanip, 2R1W grevluti, TBD, high, 3, yes, TBD, yes, sv/bitmanip, 1R1W -- 2.30.2