From: Konstantinos Margaritis Date: Thu, 4 May 2023 14:11:31 +0000 (+0000) Subject: Add 2 more instructions to help with 2-coeff butterfly X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=31dcf687d8c99c06f6015ffdb6e69d6ac804975d;p=openpower-isa.git Add 2 more instructions to help with 2-coeff butterfly fdct_round_shift(a*c1 +/- b*c2) They are to be used complementary to maddsubrs, so one can now do this calculation in 3 instructions. Added some unit tests to demonstrate the operation. --- diff --git a/openpower/isa/butterfly.mdwn b/openpower/isa/butterfly.mdwn index 43c5c48f..ec106793 100644 --- a/openpower/isa/butterfly.mdwn +++ b/openpower/isa/butterfly.mdwn @@ -38,3 +38,59 @@ Pseudo-code: Special Registers Altered: None + +# [DRAFT] Integer Butterfly Multiply Add and Accumulate FFT/DCT + +A-Form + +* maddrs RT,RA,SH,RB + +Pseudo-code: + + n <- SH + prod <- MULS(RB, RA) + prod_lo <- prod[XLEN:(XLEN*2)-1] + if n = 0 then + RT <- (RT) + prod_lo + else + res <- (RT) + prod_lo + round <- [0]*XLEN + round[XLEN -n] <- 1 + res <- res + round + signbit <- res[0] + m <- MASK(n, (XLEN-1)) + res <- ROTL64(res, XLEN-n) & m + smask <- ([signbit]*XLEN) & ¬m + RT <- (res | smask) + +Special Registers Altered: + + None + +# [DRAFT] Integer Butterfly Multiply Subtract From FFT/DCT + +A-Form + +* msubrs RT,RA,SH,RB + +Pseudo-code: + + n <- SH + prod <- MULS(RB, RA) + prod_lo <- prod[XLEN:(XLEN*2)-1] + if n = 0 then + RT <- (RT) - prod_lo + else + res <- (RT) - prod_lo + round <- [0]*XLEN + round[XLEN -n] <- 1 + res <- res + round + signbit <- res[0] + m <- MASK(n, (XLEN-1)) + res <- ROTL64(res, XLEN-n) & m + smask <- ([signbit]*XLEN) & ¬m + RT <- (res | smask) + +Special Registers Altered: + + None diff --git a/openpower/isatables/RM-1P-2S1D.csv b/openpower/isatables/RM-1P-2S1D.csv index 05b756f5..4b91a4d2 100644 --- a/openpower/isatables/RM-1P-2S1D.csv +++ b/openpower/isatables/RM-1P-2S1D.csv @@ -121,5 +121,4 @@ fremainder,NORMAL,,1P,EXTRA3,NO,d:FRT;d:CR1,s:FRA,s:FRB,0,FRA,FRB,0,FRT,0,CR1,0 fpowr,NORMAL,,1P,EXTRA3,NO,d:FRT;d:CR1,s:FRA,s:FRB,0,FRA,FRB,0,FRT,0,CR1,0 fpow,NORMAL,,1P,EXTRA3,NO,d:FRT;d:CR1,s:FRA,s:FRB,0,FRA,FRB,0,FRT,0,CR1,0 rlwimi,NORMAL,,1P,EXTRA3,NO,d:RA;d:CR0,s:RA,s:RS,0,RA,0,RS,RA,0,CR0,0 -maddsubrs,NORMAL,,1P,EXTRA3,NO,TODO,0,0,0,RA,0,RB,RT,0,CR0,0 rldimi,NORMAL,,1P,EXTRA3,NO,d:RA;d:CR0,s:RA,s:RS,0,RA,0,RS,RA,0,CR0,0 diff --git a/openpower/isatables/minor_22.csv b/openpower/isatables/minor_22.csv index a918a16a..1ba448b7 100644 --- a/openpower/isatables/minor_22.csv +++ b/openpower/isatables/minor_22.csv @@ -40,4 +40,6 @@ opcode,unit,internal op,in1,in2,in3,out,CR in,CR out,inv A,inv out,cry in,cry ou ------10001,ALU,OP_BMASK,RA,RB,NONE,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,bmask,BM2,,1,unofficial until submitted and approved/renumbered by the opf isa wg -----00011-,ALU,OP_FMVIS,NONE,CONST_UI,NONE,FRS,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,fmvis,DX,,1,unofficial until submitted and approved/renumbered by the opf isa wg -----01011-,ALU,OP_FISHMV,FRS,CONST_UI,NONE,FRS,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,0,0,NONE,0,0,fishmv,DX,,1,unofficial until submitted and approved/renumbered by the opf isa wg -------01000,ALU,OP_MADDSUBRS,RA,CONST_SH,RB,RT,NONE,CR0,0,0,ZERO,0,NONE,0,0,0,0,1,0,RC_ONLY,0,0,maddsubrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg +------01000,ALU,OP_MADDSUBRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,maddsubrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg +------01001,ALU,OP_MADDRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,maddrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg +------01010,ALU,OP_MSUBRS,RA,CONST_SH,RB,RT,NONE,NONE,0,0,ZERO,0,NONE,0,0,0,0,1,0,NONE,0,0,msubrs,A,,1,unofficial until submitted and approved/renumbered by the opf isa wg diff --git a/src/openpower/decoder/isa/caller.py b/src/openpower/decoder/isa/caller.py index f5657518..101a2ad0 100644 --- a/src/openpower/decoder/isa/caller.py +++ b/src/openpower/decoder/isa/caller.py @@ -1920,7 +1920,7 @@ class ISACaller(ISACallerHelper, ISAFPHelpers, StepLoop): "fmvtg", "fmvtgs", "fcvtfg", "fcvtfgs", "fmvfg", "fmvfgs", - "maddsubrs" + "maddsubrs", "maddrs", "msubrs" ]: illegal = False ins_name = dotstrp diff --git a/src/openpower/decoder/power_decoder2.py b/src/openpower/decoder/power_decoder2.py index a243d825..2e06b5d7 100644 --- a/src/openpower/decoder/power_decoder2.py +++ b/src/openpower/decoder/power_decoder2.py @@ -1082,6 +1082,7 @@ class PowerDecodeSubset(Elaboratable): # implicit RS for major 22, integer maddsubrs with m.If((major == 22) & xo6.matches( '-01000', # maddsubrs + '-01001', # maddrs )): comb += self.implicit_rs.eq(1) comb += self.extend_rb_maxvl.eq(1) # extend RB diff --git a/src/openpower/decoder/power_enums.py b/src/openpower/decoder/power_enums.py index a7b1f5b4..ebd23321 100644 --- a/src/openpower/decoder/power_enums.py +++ b/src/openpower/decoder/power_enums.py @@ -756,7 +756,9 @@ _insns = [ # "lwzbr", "lwzubr", # more load word SVP64 bit-reversed "maddedu", "maddedus", "maddhd", "maddhdu", "maddld", # INT multiply-and-add - "maddsubrs", # Integer DCT Butterfly + "maddsubrs", # Integer DCT Butterfly Add Sub and Round Shift + "maddrs", # Integer DCT Butterfly Add and Accumulate and Round Shift + "msubrs", # Integer DCT Butterfly Subtract From and Round Shift "mcrf", "mcrxr", "mcrxrx", "mfcr/mfocrf", # CR mvs "mfmsr", "mfspr", "minmax", # AV bitmanip @@ -919,6 +921,8 @@ class MicrOp(Enum): OP_DSHR = 102 OP_SHADD = 103 OP_MADDSUBRS = 104 + OP_MADDRS = 105 + OP_MSUBRS = 106 class In1Sel(Enum): diff --git a/src/openpower/test/alu/maddsubrs_cases.py b/src/openpower/test/alu/maddsubrs_cases.py index 2a495f60..f4433fcb 100644 --- a/src/openpower/test/alu/maddsubrs_cases.py +++ b/src/openpower/test/alu/maddsubrs_cases.py @@ -13,76 +13,121 @@ import unittest class MADDSUBRSTestCase(TestAccumulatorBase): def case_0_maddsubrs(self): - isa = SVP64Asm(["maddsubrs 1,2,14,3"]) + isa = SVP64Asm(["maddsubrs 1,10,14,11"]) lst = list(isa) initial_regs = [0] * 32 initial_regs[1] = 0x00000a71 - initial_regs[2] = 0x0000e6b8 - initial_regs[3] = 0x00002d41 + initial_regs[10] = 0x0000e6b8 + initial_regs[11] = 0x00002d41 e = ExpectedState(pc=4) e.intregs[1] = 0x0000aa86 e.intregs[2] = 0xffffffffffff643e - e.intregs[3] = 0x00002d41 + e.intregs[10] = 0x0000e6b8 + e.intregs[11] = 0x00002d41 self.add_case(Program(lst, bigendian), initial_regs, expected=e) def case_1_maddsubrs(self): - isa = SVP64Asm(["maddsubrs 1,2,0,3"]) + isa = SVP64Asm(["maddsubrs 1,10,0,11"]) lst = list(isa) initial_regs = [0] * 32 initial_regs[1] = 0x00000a71 - initial_regs[2] = 0x0000e6b8 - initial_regs[3] = 0x00002d41 + initial_regs[10] = 0x0000e6b8 + initial_regs[11] = 0x00002d41 e = ExpectedState(pc=4) e.intregs[1] = 0x2aa17069 e.intregs[2] = 0xffffffffd90f96f9 - e.intregs[3] = 0x00002d41 + e.intregs[10] = 0x0000e6b8 + e.intregs[11] = 0x00002d41 self.add_case(Program(lst, bigendian), initial_regs, expected=e) def case_2_maddsubrs(self): - isa = SVP64Asm(["maddsubrs 1,2,2,3"]) + isa = SVP64Asm(["maddsubrs 1,10,2,11"]) lst = list(isa) initial_regs = [0] * 32 initial_regs[1] = 0x100000000 - initial_regs[2] = 0x000000003 - initial_regs[3] = 0x10000000 + initial_regs[10] = 0x000000003 + initial_regs[11] = 0x10000000 e = ExpectedState(pc=4) e.intregs[1] = 0x40000000c000000; e.intregs[2] = 0x3fffffff4000000; - e.intregs[3] = 0x10000000; + e.intregs[10] = 0x00000003 + e.intregs[11] = 0x10000000; self.add_case(Program(lst, bigendian), initial_regs, expected=e) def case_3_maddsubrs(self): - isa = SVP64Asm(["maddsubrs 1,2,16,3"]) + isa = SVP64Asm(["maddsubrs 1,10,16,11"]) lst = list(isa) initial_regs = [0] * 32 initial_regs[1] = 0x100000000 - initial_regs[2] = 0x000000003 - initial_regs[3] = 0x10000000 + initial_regs[10] = 0x000000003 + initial_regs[11] = 0x10000000 e = ExpectedState(pc=4) e.intregs[1] = 0x100000003000; e.intregs[2] = 0x0fffffffd000; - e.intregs[3] = 0x10000000; + e.intregs[10] = 0x00000003 + e.intregs[11] = 0x10000000; self.add_case(Program(lst, bigendian), initial_regs, expected=e) - def case_3_maddsubrs(self): - isa = SVP64Asm(["maddsubrs 1,2,1,3"]) + def case_4_maddsubrs(self): + isa = SVP64Asm(["maddsubrs 1,10,1,11"]) lst = list(isa) initial_regs = [0] * 32 initial_regs[1] = 0x100000000 - initial_regs[2] = 0x000000003 - initial_regs[3] = 0xff0000000 + initial_regs[10] = 0x000000003 + initial_regs[11] = 0xff0000000 e = ExpectedState(pc=4) e.intregs[1] = 0xf8000017e8000000; e.intregs[2] = 0xf7ffffe818000000; - e.intregs[3] = 0xff0000000; + e.intregs[10] = 0x000000003 + e.intregs[11] = 0xff0000000; + self.add_case(Program(lst, bigendian), initial_regs, expected=e) + + def case_0_maddrs(self): + isa = SVP64Asm(["maddsubrs 1,10,0,11", + "maddrs 1,10,0,12", + "msubrs 2,10,0,12"]) + lst = list(isa) + + initial_regs = [0] * 32 + initial_regs[1] = 0x00000a71 + initial_regs[10] = 0x0000e6b8 + initial_regs[11] = 0x00002d41 + initial_regs[12] = 0x00000d00 + + e = ExpectedState(pc=12) + e.intregs[1] = 0x3658c869 + e.intregs[2] = 0xffffffffcd583ef9 + e.intregs[10] = 0x0000e6b8 + e.intregs[11] = 0x00002d41 + e.intregs[12] = 0x00000d00 + self.add_case(Program(lst, bigendian), initial_regs, expected=e) + + def case_1_maddrs(self): + isa = SVP64Asm(["maddsubrs 1,10,0,11", + "maddrs 1,10,14,12", + "msubrs 2,10,14,12"]) + lst = list(isa) + + initial_regs = [0] * 32 + initial_regs[1] = 0x00000a71 + initial_regs[10] = 0x0000e6b8 + initial_regs[11] = 0x00002d41 + initial_regs[12] = 0x00000d00 + + e = ExpectedState(pc=12) + e.intregs[1] = 0x0000d963 + e.intregs[2] = 0xffffffffffff3561 + e.intregs[10] = 0x0000e6b8 + e.intregs[11] = 0x00002d41 + e.intregs[12] = 0x00000d00 self.add_case(Program(lst, bigendian), initial_regs, expected=e)