1 # Copyright (c) 2023, Luke Kenneth Casson Leighton <lkcl@lkcl.net>
2 # Licensed under the LGPLv3+
3 # Funded by NLnet NGI-ASSURE under EU grant agreement No 957073.
4 # * https://nlnet.nl/project/LibreSOC-GigabitRouter/
5 # * https://bugs.libre-soc.org/show_bug.cgi?id=1157
6 # * Based on https://github.com/floodyberry/poly1305-donna (Public Domain)
7 """Implementation of Poly1305 authenticator for RFC 7539
8 Design principles are well-documented at:
9 https://loup-vaillant.fr/tutorials/poly1305-design
10 """
12 def divceil(a, b): return -(a // -b)
14 poly1305_block_size = 16
16 mask128 = (1<<128)-1
17 mask64 = (1<<64)-1
18 def _MUL(x, y): out = (x&mask64) * (y&mask64); print("mul %x*%x=%x" % (x, y, out)); return out
19 def _ADD(out, i): return (out + i)
20 def _ADDLO(out, i): return (out + (i & mask64))
21 def _SHR(i, shift): out = (i >> shift) & mask64; print("shr %x>>%d=%x mask %x" % (i,shift,out,mask64)); return out
22 def _LO(i): return i & mask64
25 # this function is extracted from bigint_cases.py (should be in a library)
26 # it is a python implementation of dsrd, see pseudocode in
27 # https://libre-soc.org/openpower/isa/svfixedarith/
28 def _DSRD(lo, hi, sh):
29 sh = sh % 64
30 v = lo << 64
31 v >>= sh
32 mask = ~((2 ** 64 - 1) >> sh)
33 v |= (hi & mask) << 64
34 hi = (v >> 64) % (2 ** 64)
35 lo = v % (2 ** 64)
36 return lo, hi
38 # interception function which allows analysis of carry-roll-over
39 intercepts = {}
41 def log(p1305, fn, result, args):
42 """intercept of mathematical primitives is recorded, so that
43 analysis is possible to find any carry-roll-over occurrences.
44 these we *assume* are when one of the add arguments is between
45 0 and say... 7?
46 """
47 name = fn.__name__[1:]
49 # rright. the 2nd parameter, if between the values 0 and 7,
50 # is assumed to be some sort of carry. everything else is ignored
51 arg1, arg2 = args
52 if arg2 > 7:
53 return
54 else: # only interested in adds for now
55 return
56 # begin hashing and adding this operation into the intercepts log
57 phash = hash(p1305)
58 info = (name, result, args)
59 key = hash(info)
60 logreport = "%5s %x <= " % (name, result)
61 logreport = logreport + " ".join(list(map(lambda x: "%x" % x, args)))
62 intercepts[key] = logreport
64 def intercept(p1305, args, fn):
65 result = fn(*args)
66 log(p1305, fn, result, args)
67 return result
70 class Ctx:
71 """A ContextManager for noting the inputs and outputs for interception.
72 The idea is to create unit tests with these same inputs and record
73 the expected outputs
74 """
76 def __init__(self, log, variables, inputs, outputs):
77 self.log = log
78 self.variables = variables
79 self.inputs = inputs
80 self.outputs = outputs
82 def print_vars(self, varnames):
83 for v in varnames:
84 print(" %s %s" % (v, repr(self.variables[v])))
86 def __enter__(self):
87 print("enter")
88 self.print_vars(self.inputs)
90 def __exit__(self, *args):
91 print("exit", args, self.outputs)
92 self.print_vars(self.outputs)
95 class Poly1305Donna(object):
97 """Poly1305 authenticator"""
99 P = 0x3fffffffffffffffffffffffffffffffb # 2^130-5
101 # suite of primitives (128-bit and 64-bit) which can be intercepted
102 # here in order to analyse carry-roll-over
103 def MUL(self, *args): return intercept(self, args, _MUL) # x,y
104 def ADD(self, *args): return intercept(self, args, _ADD) # out,i
105 def ADDLO(self, *args): return intercept(self, args, _ADDLO) # out,i
106 def SHR(self, *args): return intercept(self, args, _SHR) # i,shift
107 def LO(self, *args): return intercept(self, args, _LO) # i
108 def DSRD(self, *args): return intercept(self, args, _DSRD) # lo,hi,sh
110 @staticmethod
111 def le_bytes_to_num(data):
112 """Convert a number from little endian byte format"""
113 ret = 0
114 for i in range(len(data) - 1, -1, -1):
115 ret <<= 8
116 ret += data[i]
117 return ret
119 @staticmethod
120 def num_to_16_le_bytes(num):
121 """Convert number to 16 bytes in little endian format"""
122 ret = [0]*16
123 for i, _ in enumerate(ret):
124 ret[i] = num & 0xff
125 num >>= 8
126 return bytearray(ret)
128 def __init__(self, key):
129 """Set the authenticator key"""
130 if len(key) != 32:
131 raise ValueError("Key must be 256 bit long")
133 self.buffer = [0]*16
134 self.acc = 0
135 self.r = self.le_bytes_to_num(key[0:16])
136 self.r &= 0x0ffffffc0ffffffc0ffffffc0fffffff
137 self.s = self.le_bytes_to_num(key[16:32])
139 # r &= 0xffffffc0ffffffc0ffffffc0fffffff */
140 t = self.t = [0]*2
141 t[0] = self.le_bytes_to_num(key[0:8]) # t0 = U8TO64(&key[0]);
142 t[1] = self.le_bytes_to_num(key[8:16]) # t1 = U8TO64(&key[8]);
144 print ("init t %x %x" % (t[0], t[1]))
146 r = self.r = [0]*3
147 with Ctx("init r<-t", locals(), ["r"], ["t"]):
148 r[0] = ( t[0] ) & 0xffc0fffffff
149 r[1] = ((t[0] >> 44) | (t[1] << 20)) & 0xfffffc0ffff
150 r[2] = ((t[1] >> 24) ) & 0x00ffffffc0f
152 # h = 0 */
153 h = self.h = [0]*3
155 # save pad for later */
157 pad[0] = self.le_bytes_to_num(key[16:24])
158 pad[1] = self.le_bytes_to_num(key[24:32])
160 self.leftover = 0
161 self.final = 0
163 def poly1305_blocks(self, m):
165 # get local-names for math-primitives to look like poly1305-donna-64.h
166 MUL, ADD, ADDLO, SHR, LO = \
169 hibit = 0 if self.final else 1 << 40 # 1 << 128
170 #unsigned long long r0,r1,r2;
171 #unsigned long long s1,s2;
172 #unsigned long long h0,h1,h2;
173 #unsigned long long c;
174 #uint128_t d0,d1,d2,d;
176 r0 = self.r[0];
177 r1 = self.r[1];
178 r2 = self.r[2];
180 h0 = self.h[0];
181 h1 = self.h[1];
182 h2 = self.h[2];
184 s1 = r1 * (5 << 2);
185 s2 = r2 * (5 << 2);
187 print("blocks r %x %x %x" % (r0, r1, r2))
188 print("blocks h %x %x %x" % (h0, h1, h2))
189 print("blocks s %x %x" % (s1, s2))
191 while len(m) >= poly1305_block_size:
192 #unsigned long long t0,t1;
194 #/* h += m[i] */
195 t0 = self.le_bytes_to_num(m[0:8])
196 t1 = self.le_bytes_to_num(m[8:16])
198 print(" loop t %x %x" % (t0, t1))
200 h0 += (( t0 ) & 0xfffffffffff);
201 h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
202 h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
204 print(" loop h+t %x %x %x" % (h0, h1, h2))
206 #/* h *= r */
207 d0=MUL(h0,r0); d=MUL(h1,s2);
208 print(" h*=r d0 d %x %x" % (d0, d))
209 d0+=d; d=MUL(h2,s1); d0+=d;
212 print(" after h*=r d0 d1 d2 %x %x %x %x" % (d0, d1, d2, d))
214 #/* (partial) h %= p */
215 c = 0
216 d0 = ADDLO(d0,c); c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
217 d1 = ADDLO(d1,c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
218 d2 = ADDLO(d2,c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
219 h0 += MUL(c, 5); c = (h0 >> 44) ; h0 = h0 & 0xfffffffffff;
220 h1 += MUL(c, 1);
222 m = m[poly1305_block_size:]
224 self.h[0] = h0;
225 self.h[1] = h1;
226 self.h[2] = h2;
228 def poly1305_finish(self):
230 # get local-names for math-primitives to look like poly1305-donna-64.h
231 MUL, ADD, ADDLO, SHR, LO = \
234 #unsigned long long h0,h1,h2,c;
235 #unsigned long long g0,g1,g2;
236 #unsigned long long t0,t1;
238 #/* process the remaining block */
239 if self.leftover:
240 i = self.leftover;
241 self.buffer[i] = 1;
242 for i in range(i+1, poly1305_block_size):
243 self.buffer[i] = 0;
244 self.final = 1;
245 self.poly1305_blocks(self.buffer)
247 f3, ff = 0x3ffffffffff, 0xfffffffffff
249 #/* fully carry h */
250 h0 = self.h[0];
251 h1 = self.h[1];
252 h2 = self.h[2];
254 print("finish %x %x %x" % (h0, h1, h2))
256 # commented-out from the original (left in for comparison),
257 # see https://bugs.libre-soc.org/show_bug.cgi?id=1157#c3
258 # as to what is going on here
260 #c = 0
261 #h1 += c; c = (h1 >> 44); h1 &= ff;
262 #h2 += c; c = (h2 >> 42); h2 &= f3;
263 #h0 += c * 5; c = (h0 >> 44); h0 &= ff;
264 #h1 += c; c = (h1 >> 44); h1 &= ff;
265 #h2 += c; c = (h2 >> 42); h2 &= f3;
266 #h0 += c * 5; c = (h0 >> 44); h0 &= ff;
267 #h1 += c;
269 # okaaay, first "preparation" for conversion to SVP64 REMAP/Indexed:
270 # extract the constants/indices from the original above and look for the
271 # common pattern, which is:
272 # h? += c * ?; c = (h? >> ??); h? &= ??;
274 # these appear to be repeated twice
275 idxconsts = [ # hN c* shf
276 [1, 1, 44],
277 [2, 1, 42],
278 [0, 5, 44]
279 ]
280 c = 0 # start with carry=0
281 for hidx, cmul, shf in idxconsts*2: # repeat the pattern twice
282 self.h[hidx] += MUL(c, cmul) # don't worry about *1
283 c = self.h[hidx] >> shf # these two could use dsrd
284 self.h[hidx] &= (1<<shf) - 1 # (one instruction)
285 self.h[1] += c; # can't have everything...
287 h0, h1, h2 = self.h
289 print(" h0-2 %x %x %x" % (h0, h1, h2))
291 #/* compute h + -p */
292 c = 5
293 g0 = ADD(h0, c); c = (g0 >> 44); g0 &= ff;
294 g1 = ADD(h1, c); c = (g1 >> 44); g1 &= ff;
295 g2 = (ADD(h2, c) - (1 << 42)) & mask64
297 print(" g0-2 %x %x %x" % (g0, g1, g2))
299 #/* select h if h < p, or h + -p if h >= p */
300 c = (g2 >> ((8 * 8) - 1)) - 1;
301 print(" c %x" % c)
302 g0 &= c;
303 g1 &= c;
304 g2 &= c;
305 c = ~c;
306 h0 = (h0 & c) | g0;
307 h1 = (h1 & c) | g1;
308 h2 = (h2 & c) | g2;
310 #/* h = (h + pad) */
311 t0 = self.pad[0];
312 t1 = self.pad[1];
314 h0 += ADD(( t0 ) & ff, 0); c = (h0 >> 44); h0 &= ff;
315 h1 += ADD(((t0 >> 44) | (t1 << 20)) & ff, c); c = (h1 >> 44); h1 &= ff;
316 h2 += ADD(((t1 >> 24) ) & f3, c); h2 &= f3;
318 #/* mac = h % (2^128) */
319 h0 = ((h0 ) | (h1 << 44));
320 h1 = ((h1 >> 20) | (h2 << 24));
322 mac = [0]*16
323 mac[0:8] = self.num_to_16_le_bytes(h0)[0:8]
324 mac[8:16] = self.num_to_16_le_bytes(h1)[0:8]
326 # /* zero out the state */
327 self.h[0] = 0;
328 self.h[1] = 0;
329 self.h[2] = 0;
330 self.r[0] = 0;
331 self.r[1] = 0;
332 self.r[2] = 0;
333 self.pad[0] = 0;
334 self.pad[1] = 0;
336 return bytearray(mac)
338 def poly1305_update(self, m):
340 #/* handle leftover */
341 if (self.leftover):
342 want = (poly1305_block_size - self.leftover);
343 if (want > len(m)):
344 want = len(m);
345 for i in range(want):
346 self.buffer[self.leftover + i] = m[i];
347 m = m[want:]
348 self.leftover += want;
349 if (self.leftover < poly1305_block_size):
350 return;
351 self.poly1305_blocks(self.buffer);
352 self.leftover = 0;
354 # /* process full blocks */
355 if (len(m) >= poly1305_block_size):
356 want = (len(m) & ~(poly1305_block_size - 1));
357 self.poly1305_blocks(m[:want]);
358 m = m[want:]
360 # /* store leftover */
361 for i in range(len(m)):
362 self.buffer[self.leftover + i] = m[i];
363 self.leftover += len(m);
365 def create_tag(self, data):
366 """Calculate authentication tag for data"""
367 self.poly1305_update(data)
368 return self.poly1305_finish()
371 # quick usage demo, make identical to poly1305-donna/example-poly1305.c
372 if __name__ == '__main__':
373 key = list(range(221, 253))
374 mac = Poly1305Donna(key).create_tag(bytearray(range(121,121+73)))
375 print("result hash:", end=" ")
376 for byte in mac:
377 print(hex(byte)[2:], sep='', end='')
378 print()
380 # print out the intercepts
381 for intercept in intercepts.values():
382 print (intercept)
384 expected = [0xdd,0xb9,0xda,0x7d,0xdd,0x5e,0x52,0x79,
385 0x27,0x30,0xed,0x5c,0xda,0x5f,0x90,0xa4]
386 assert mac == bytearray(expected)