illustrate the intermediary step of converting poly1305-donna.py
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 17 Sep 2023 18:42:22 +0000 (19:42 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Sun, 17 Sep 2023 18:42:25 +0000 (19:42 +0100)
to a form that is "reasonably close" to how the SVP64 assembler,
using REMAP Indexed, would work.
https://bugs.libre-soc.org/show_bug.cgi?id=1157#c3 for details

src/openpower/decoder/isa/poly1305-donna.py

index 91c6e7f8a34e7e5d88f8d83760e82422ca45a901..66e4c4a11bc137fede82efee5891e05738ab5bae 100644 (file)
@@ -158,14 +158,38 @@ class Poly1305Donna(object):
 
         print("finish %x %x %x" % (h0, h1, h2))
 
-        c = 0
-        h1 += c;     c = (h1 >> 44); h1 &= ff;
-        h2 += c;     c = (h2 >> 42); h2 &= f3;
-        h0 += c * 5; c = (h0 >> 44); h0 &= ff;
-        h1 += c;     c = (h1 >> 44); h1 &= ff;
-        h2 += c;     c = (h2 >> 42); h2 &= f3;
-        h0 += c * 5; c = (h0 >> 44); h0 &= ff;
-        h1 += c;
+        # commented-out from the original (left in for comparison),
+        # see https://bugs.libre-soc.org/show_bug.cgi?id=1157#c3
+        # as to what is going on here
+
+        #c = 0
+        #h1 += c;     c = (h1 >> 44); h1 &= ff;
+        #h2 += c;     c = (h2 >> 42); h2 &= f3;
+        #h0 += c * 5; c = (h0 >> 44); h0 &= ff;
+        #h1 += c;     c = (h1 >> 44); h1 &= ff;
+        #h2 += c;     c = (h2 >> 42); h2 &= f3;
+        #h0 += c * 5; c = (h0 >> 44); h0 &= ff;
+        #h1 += c;
+
+        # okaaay, first "preparation" for conversion to SVP64 REMAP/Indexed:
+        # extract the constants/indices from the original above and look for the
+        # common pattern, which is:
+        # h? += c * ?; c = (h? >> ??); h? &= ??;
+
+        # these appear to be repeated twice
+        idxconsts = [ # hN c* shf
+                       [1, 1, 44],
+                       [2, 1, 42],
+                       [0, 4, 44]
+                    ]
+        c = 0 # start with carry=0
+        for hidx, cmul, shf in idxconsts*2: # repeat the pattern twice
+            self.h[hidx] += c * cmul        # don't worry about *1
+            c = self.h[hidx] >> shf         # these two could use dsrd
+            self.h[hidx] &= (1<<shf) - 1    # (one instruction)
+        self.h[1] += c; # can't have everything...
+
+        h0, h1, h2 = self.h
 
         print("    h0-2 %x %x %x" % (h0, h1, h2))