Merge pull request #2487 from whitequark/cxxrtl-outlining
[yosys.git] / backends / cxxrtl / cxxrtl.h
1 /*
2 * yosys -- Yosys Open SYnthesis Suite
3 *
4 * Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
5 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 */
18
19 // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself.
20 //
21 // The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides
22 // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass
23 // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler
24 // to unwrap the abstraction and generate efficient code.
25
26 #ifndef CXXRTL_H
27 #define CXXRTL_H
28
29 #include <cstddef>
30 #include <cstdint>
31 #include <cassert>
32 #include <limits>
33 #include <type_traits>
34 #include <tuple>
35 #include <vector>
36 #include <map>
37 #include <algorithm>
38 #include <memory>
39 #include <functional>
40 #include <sstream>
41
42 #include <backends/cxxrtl/cxxrtl_capi.h>
43
44 #ifndef __has_attribute
45 # define __has_attribute(x) 0
46 #endif
47
48 // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector.
49 // It generates a lot of specialized template functions with relatively large bodies that, when inlined
50 // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities.
51 // Because of this, most of the CXXRTL runtime must be always inlined for best performance.
52 #if __has_attribute(always_inline)
53 #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__))
54 #else
55 #define CXXRTL_ALWAYS_INLINE inline
56 #endif
57 // Conversely, some functions in the generated code are extremely large yet very cold, with both of these
58 // properties being extreme enough to confuse C++ compilers into spending pathological amounts of time
59 // on a futile (the code becomes worse) attempt to optimize the least important parts of code.
60 #if __has_attribute(optnone)
61 #define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__))
62 #elif __has_attribute(optimize)
63 #define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0)))
64 #else
65 #define CXXRTL_EXTREMELY_COLD
66 #endif
67
68 // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior
69 // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at
70 // most result in undefined simulation results).
71 //
72 // Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating
73 // the simulation into another process that should survive violating RTL contracts.
74 #ifndef CXXRTL_ASSERT
75 #ifndef CXXRTL_NDEBUG
76 #define CXXRTL_ASSERT(x) assert(x)
77 #else
78 #define CXXRTL_ASSERT(x)
79 #endif
80 #endif
81
82 namespace cxxrtl {
83
84 // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size
85 // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving
86 // and introspecting the simulation in Python.
87 //
88 // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on
89 // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register.
90 // However, (a) most of our operations do not change those bits in the first place because of invariants that are
91 // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway.
92 // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be
93 // clobbered results in simpler generated code.
94 typedef uint32_t chunk_t;
95 typedef uint64_t wide_chunk_t;
96
97 template<typename T>
98 struct chunk_traits {
99 static_assert(std::is_integral<T>::value && std::is_unsigned<T>::value,
100 "chunk type must be an unsigned integral type");
101 using type = T;
102 static constexpr size_t bits = std::numeric_limits<T>::digits;
103 static constexpr T mask = std::numeric_limits<T>::max();
104 };
105
106 template<class T>
107 struct expr_base;
108
109 template<size_t Bits>
110 struct value : public expr_base<value<Bits>> {
111 static constexpr size_t bits = Bits;
112
113 using chunk = chunk_traits<chunk_t>;
114 static constexpr chunk::type msb_mask = (Bits % chunk::bits == 0) ? chunk::mask
115 : chunk::mask >> (chunk::bits - (Bits % chunk::bits));
116
117 static constexpr size_t chunks = (Bits + chunk::bits - 1) / chunk::bits;
118 chunk::type data[chunks] = {};
119
120 value() = default;
121 template<typename... Init>
122 explicit constexpr value(Init ...init) : data{init...} {}
123
124 value(const value<Bits> &) = default;
125 value<Bits> &operator=(const value<Bits> &) = default;
126
127 value(value<Bits> &&) = default;
128 value<Bits> &operator=(value<Bits> &&) = default;
129
130 // A (no-op) helper that forces the cast to value<>.
131 CXXRTL_ALWAYS_INLINE
132 const value<Bits> &val() const {
133 return *this;
134 }
135
136 std::string str() const {
137 std::stringstream ss;
138 ss << *this;
139 return ss.str();
140 }
141
142 // Conversion operations.
143 //
144 // These functions ensure that a conversion is never out of range, and should be always used, if at all
145 // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and
146 // .concat() can be used to split them into more manageable parts.
147 template<class IntegerT>
148 CXXRTL_ALWAYS_INLINE
149 IntegerT get() const {
150 static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed,
151 "get<T>() requires T to be an unsigned integral type");
152 static_assert(std::numeric_limits<IntegerT>::digits >= Bits,
153 "get<T>() requires T to be at least as wide as the value is");
154 IntegerT result = 0;
155 for (size_t n = 0; n < chunks; n++)
156 result |= IntegerT(data[n]) << (n * chunk::bits);
157 return result;
158 }
159
160 template<class IntegerT>
161 CXXRTL_ALWAYS_INLINE
162 void set(IntegerT other) {
163 static_assert(std::numeric_limits<IntegerT>::is_integer && !std::numeric_limits<IntegerT>::is_signed,
164 "set<T>() requires T to be an unsigned integral type");
165 static_assert(std::numeric_limits<IntegerT>::digits >= Bits,
166 "set<T>() requires the value to be at least as wide as T is");
167 for (size_t n = 0; n < chunks; n++)
168 data[n] = (other >> (n * chunk::bits)) & chunk::mask;
169 }
170
171 // Operations with compile-time parameters.
172 //
173 // These operations are used to implement slicing, concatenation, and blitting.
174 // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left);
175 // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right).
176 template<size_t NewBits>
177 CXXRTL_ALWAYS_INLINE
178 value<NewBits> trunc() const {
179 static_assert(NewBits <= Bits, "trunc() may not increase width");
180 value<NewBits> result;
181 for (size_t n = 0; n < result.chunks; n++)
182 result.data[n] = data[n];
183 result.data[result.chunks - 1] &= result.msb_mask;
184 return result;
185 }
186
187 template<size_t NewBits>
188 CXXRTL_ALWAYS_INLINE
189 value<NewBits> zext() const {
190 static_assert(NewBits >= Bits, "zext() may not decrease width");
191 value<NewBits> result;
192 for (size_t n = 0; n < chunks; n++)
193 result.data[n] = data[n];
194 return result;
195 }
196
197 template<size_t NewBits>
198 CXXRTL_ALWAYS_INLINE
199 value<NewBits> sext() const {
200 static_assert(NewBits >= Bits, "sext() may not decrease width");
201 value<NewBits> result;
202 for (size_t n = 0; n < chunks; n++)
203 result.data[n] = data[n];
204 if (is_neg()) {
205 result.data[chunks - 1] |= ~msb_mask;
206 for (size_t n = chunks; n < result.chunks; n++)
207 result.data[n] = chunk::mask;
208 result.data[result.chunks - 1] &= result.msb_mask;
209 }
210 return result;
211 }
212
213 template<size_t NewBits>
214 CXXRTL_ALWAYS_INLINE
215 value<NewBits> rtrunc() const {
216 static_assert(NewBits <= Bits, "rtrunc() may not increase width");
217 value<NewBits> result;
218 constexpr size_t shift_chunks = (Bits - NewBits) / chunk::bits;
219 constexpr size_t shift_bits = (Bits - NewBits) % chunk::bits;
220 chunk::type carry = 0;
221 if (shift_chunks + result.chunks < chunks) {
222 carry = (shift_bits == 0) ? 0
223 : data[shift_chunks + result.chunks] << (chunk::bits - shift_bits);
224 }
225 for (size_t n = result.chunks; n > 0; n--) {
226 result.data[n - 1] = carry | (data[shift_chunks + n - 1] >> shift_bits);
227 carry = (shift_bits == 0) ? 0
228 : data[shift_chunks + n - 1] << (chunk::bits - shift_bits);
229 }
230 return result;
231 }
232
233 template<size_t NewBits>
234 CXXRTL_ALWAYS_INLINE
235 value<NewBits> rzext() const {
236 static_assert(NewBits >= Bits, "rzext() may not decrease width");
237 value<NewBits> result;
238 constexpr size_t shift_chunks = (NewBits - Bits) / chunk::bits;
239 constexpr size_t shift_bits = (NewBits - Bits) % chunk::bits;
240 chunk::type carry = 0;
241 for (size_t n = 0; n < chunks; n++) {
242 result.data[shift_chunks + n] = (data[n] << shift_bits) | carry;
243 carry = (shift_bits == 0) ? 0
244 : data[n] >> (chunk::bits - shift_bits);
245 }
246 if (shift_chunks + chunks < result.chunks)
247 result.data[shift_chunks + chunks] = carry;
248 return result;
249 }
250
251 // Bit blit operation, i.e. a partial read-modify-write.
252 template<size_t Stop, size_t Start>
253 CXXRTL_ALWAYS_INLINE
254 value<Bits> blit(const value<Stop - Start + 1> &source) const {
255 static_assert(Stop >= Start, "blit() may not reverse bit order");
256 constexpr chunk::type start_mask = ~(chunk::mask << (Start % chunk::bits));
257 constexpr chunk::type stop_mask = (Stop % chunk::bits + 1 == chunk::bits) ? 0
258 : (chunk::mask << (Stop % chunk::bits + 1));
259 value<Bits> masked = *this;
260 if (Start / chunk::bits == Stop / chunk::bits) {
261 masked.data[Start / chunk::bits] &= stop_mask | start_mask;
262 } else {
263 masked.data[Start / chunk::bits] &= start_mask;
264 for (size_t n = Start / chunk::bits + 1; n < Stop / chunk::bits; n++)
265 masked.data[n] = 0;
266 masked.data[Stop / chunk::bits] &= stop_mask;
267 }
268 value<Bits> shifted = source
269 .template rzext<Stop + 1>()
270 .template zext<Bits>();
271 return masked.bit_or(shifted);
272 }
273
274 // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower
275 // than the operand. In C++17 these can be replaced with `if constexpr`.
276 template<size_t NewBits, typename = void>
277 struct zext_cast {
278 CXXRTL_ALWAYS_INLINE
279 value<NewBits> operator()(const value<Bits> &val) {
280 return val.template zext<NewBits>();
281 }
282 };
283
284 template<size_t NewBits>
285 struct zext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> {
286 CXXRTL_ALWAYS_INLINE
287 value<NewBits> operator()(const value<Bits> &val) {
288 return val.template trunc<NewBits>();
289 }
290 };
291
292 template<size_t NewBits, typename = void>
293 struct sext_cast {
294 CXXRTL_ALWAYS_INLINE
295 value<NewBits> operator()(const value<Bits> &val) {
296 return val.template sext<NewBits>();
297 }
298 };
299
300 template<size_t NewBits>
301 struct sext_cast<NewBits, typename std::enable_if<(NewBits < Bits)>::type> {
302 CXXRTL_ALWAYS_INLINE
303 value<NewBits> operator()(const value<Bits> &val) {
304 return val.template trunc<NewBits>();
305 }
306 };
307
308 template<size_t NewBits>
309 CXXRTL_ALWAYS_INLINE
310 value<NewBits> zcast() const {
311 return zext_cast<NewBits>()(*this);
312 }
313
314 template<size_t NewBits>
315 CXXRTL_ALWAYS_INLINE
316 value<NewBits> scast() const {
317 return sext_cast<NewBits>()(*this);
318 }
319
320 // Operations with run-time parameters (offsets, amounts, etc).
321 //
322 // These operations are used for computations.
323 bool bit(size_t offset) const {
324 return data[offset / chunk::bits] & (1 << (offset % chunk::bits));
325 }
326
327 void set_bit(size_t offset, bool value = true) {
328 size_t offset_chunks = offset / chunk::bits;
329 size_t offset_bits = offset % chunk::bits;
330 data[offset_chunks] &= ~(1 << offset_bits);
331 data[offset_chunks] |= value ? 1 << offset_bits : 0;
332 }
333
334 explicit operator bool() const {
335 return !is_zero();
336 }
337
338 bool is_zero() const {
339 for (size_t n = 0; n < chunks; n++)
340 if (data[n] != 0)
341 return false;
342 return true;
343 }
344
345 bool is_neg() const {
346 return data[chunks - 1] & (1 << ((Bits - 1) % chunk::bits));
347 }
348
349 bool operator ==(const value<Bits> &other) const {
350 for (size_t n = 0; n < chunks; n++)
351 if (data[n] != other.data[n])
352 return false;
353 return true;
354 }
355
356 bool operator !=(const value<Bits> &other) const {
357 return !(*this == other);
358 }
359
360 value<Bits> bit_not() const {
361 value<Bits> result;
362 for (size_t n = 0; n < chunks; n++)
363 result.data[n] = ~data[n];
364 result.data[chunks - 1] &= msb_mask;
365 return result;
366 }
367
368 value<Bits> bit_and(const value<Bits> &other) const {
369 value<Bits> result;
370 for (size_t n = 0; n < chunks; n++)
371 result.data[n] = data[n] & other.data[n];
372 return result;
373 }
374
375 value<Bits> bit_or(const value<Bits> &other) const {
376 value<Bits> result;
377 for (size_t n = 0; n < chunks; n++)
378 result.data[n] = data[n] | other.data[n];
379 return result;
380 }
381
382 value<Bits> bit_xor(const value<Bits> &other) const {
383 value<Bits> result;
384 for (size_t n = 0; n < chunks; n++)
385 result.data[n] = data[n] ^ other.data[n];
386 return result;
387 }
388
389 value<Bits> update(const value<Bits> &val, const value<Bits> &mask) const {
390 return bit_and(mask.bit_not()).bit_or(val.bit_and(mask));
391 }
392
393 template<size_t AmountBits>
394 value<Bits> shl(const value<AmountBits> &amount) const {
395 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
396 static_assert(Bits <= chunk::mask, "shl() of unreasonably large values is not supported");
397 // Detect shifts definitely large than Bits early.
398 for (size_t n = 1; n < amount.chunks; n++)
399 if (amount.data[n] != 0)
400 return {};
401 // Past this point we can use the least significant chunk as the shift size.
402 size_t shift_chunks = amount.data[0] / chunk::bits;
403 size_t shift_bits = amount.data[0] % chunk::bits;
404 if (shift_chunks >= chunks)
405 return {};
406 value<Bits> result;
407 chunk::type carry = 0;
408 for (size_t n = 0; n < chunks - shift_chunks; n++) {
409 result.data[shift_chunks + n] = (data[n] << shift_bits) | carry;
410 carry = (shift_bits == 0) ? 0
411 : data[n] >> (chunk::bits - shift_bits);
412 }
413 return result;
414 }
415
416 template<size_t AmountBits, bool Signed = false>
417 value<Bits> shr(const value<AmountBits> &amount) const {
418 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
419 static_assert(Bits <= chunk::mask, "shr() of unreasonably large values is not supported");
420 // Detect shifts definitely large than Bits early.
421 for (size_t n = 1; n < amount.chunks; n++)
422 if (amount.data[n] != 0)
423 return {};
424 // Past this point we can use the least significant chunk as the shift size.
425 size_t shift_chunks = amount.data[0] / chunk::bits;
426 size_t shift_bits = amount.data[0] % chunk::bits;
427 if (shift_chunks >= chunks)
428 return {};
429 value<Bits> result;
430 chunk::type carry = 0;
431 for (size_t n = 0; n < chunks - shift_chunks; n++) {
432 result.data[chunks - shift_chunks - 1 - n] = carry | (data[chunks - 1 - n] >> shift_bits);
433 carry = (shift_bits == 0) ? 0
434 : data[chunks - 1 - n] << (chunk::bits - shift_bits);
435 }
436 if (Signed && is_neg()) {
437 size_t top_chunk_idx = (Bits - shift_bits) / chunk::bits;
438 size_t top_chunk_bits = (Bits - shift_bits) % chunk::bits;
439 for (size_t n = top_chunk_idx + 1; n < chunks; n++)
440 result.data[n] = chunk::mask;
441 if (shift_bits != 0)
442 result.data[top_chunk_idx] |= chunk::mask << top_chunk_bits;
443 }
444 return result;
445 }
446
447 template<size_t AmountBits>
448 value<Bits> sshr(const value<AmountBits> &amount) const {
449 return shr<AmountBits, /*Signed=*/true>(amount);
450 }
451
452 size_t ctpop() const {
453 size_t count = 0;
454 for (size_t n = 0; n < chunks; n++) {
455 // This loop implements the population count idiom as recognized by LLVM and GCC.
456 for (chunk::type x = data[n]; x != 0; count++)
457 x = x & (x - 1);
458 }
459 return count;
460 }
461
462 size_t ctlz() const {
463 size_t count = 0;
464 for (size_t n = 0; n < chunks; n++) {
465 chunk::type x = data[chunks - 1 - n];
466 if (x == 0) {
467 count += (n == 0 ? Bits % chunk::bits : chunk::bits);
468 } else {
469 // This loop implements the find first set idiom as recognized by LLVM.
470 for (; x != 0; count++)
471 x >>= 1;
472 }
473 }
474 return count;
475 }
476
477 template<bool Invert, bool CarryIn>
478 std::pair<value<Bits>, bool /*CarryOut*/> alu(const value<Bits> &other) const {
479 value<Bits> result;
480 bool carry = CarryIn;
481 for (size_t n = 0; n < result.chunks; n++) {
482 result.data[n] = data[n] + (Invert ? ~other.data[n] : other.data[n]) + carry;
483 if (result.chunks - 1 == n)
484 result.data[result.chunks - 1] &= result.msb_mask;
485 carry = (result.data[n] < data[n]) ||
486 (result.data[n] == data[n] && carry);
487 }
488 return {result, carry};
489 }
490
491 value<Bits> add(const value<Bits> &other) const {
492 return alu</*Invert=*/false, /*CarryIn=*/false>(other).first;
493 }
494
495 value<Bits> sub(const value<Bits> &other) const {
496 return alu</*Invert=*/true, /*CarryIn=*/true>(other).first;
497 }
498
499 value<Bits> neg() const {
500 return value<Bits> { 0u }.sub(*this);
501 }
502
503 bool ucmp(const value<Bits> &other) const {
504 bool carry;
505 std::tie(std::ignore, carry) = alu</*Invert=*/true, /*CarryIn=*/true>(other);
506 return !carry; // a.ucmp(b) ≡ a u< b
507 }
508
509 bool scmp(const value<Bits> &other) const {
510 value<Bits> result;
511 bool carry;
512 std::tie(result, carry) = alu</*Invert=*/true, /*CarryIn=*/true>(other);
513 bool overflow = (is_neg() == !other.is_neg()) && (is_neg() != result.is_neg());
514 return result.is_neg() ^ overflow; // a.scmp(b) ≡ a s< b
515 }
516
517 template<size_t ResultBits>
518 value<ResultBits> mul(const value<Bits> &other) const {
519 value<ResultBits> result;
520 wide_chunk_t wide_result[result.chunks + 1] = {};
521 for (size_t n = 0; n < chunks; n++) {
522 for (size_t m = 0; m < chunks && n + m < result.chunks; m++) {
523 wide_result[n + m] += wide_chunk_t(data[n]) * wide_chunk_t(other.data[m]);
524 wide_result[n + m + 1] += wide_result[n + m] >> chunk::bits;
525 wide_result[n + m] &= chunk::mask;
526 }
527 }
528 for (size_t n = 0; n < result.chunks; n++) {
529 result.data[n] = wide_result[n];
530 }
531 result.data[result.chunks - 1] &= result.msb_mask;
532 return result;
533 }
534 };
535
536 // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here.
537 template<class T, size_t Stop, size_t Start>
538 struct slice_expr : public expr_base<slice_expr<T, Stop, Start>> {
539 static_assert(Stop >= Start, "slice_expr() may not reverse bit order");
540 static_assert(Start < T::bits && Stop < T::bits, "slice_expr() must be within bounds");
541 static constexpr size_t bits = Stop - Start + 1;
542
543 T &expr;
544
545 slice_expr(T &expr) : expr(expr) {}
546 slice_expr(const slice_expr<T, Stop, Start> &) = delete;
547
548 CXXRTL_ALWAYS_INLINE
549 operator value<bits>() const {
550 return static_cast<const value<T::bits> &>(expr)
551 .template rtrunc<T::bits - Start>()
552 .template trunc<bits>();
553 }
554
555 CXXRTL_ALWAYS_INLINE
556 slice_expr<T, Stop, Start> &operator=(const value<bits> &rhs) {
557 // Generic partial assignment implemented using a read-modify-write operation on the sliced expression.
558 expr = static_cast<const value<T::bits> &>(expr)
559 .template blit<Stop, Start>(rhs);
560 return *this;
561 }
562
563 // A helper that forces the cast to value<>, which allows deduction to work.
564 CXXRTL_ALWAYS_INLINE
565 value<bits> val() const {
566 return static_cast<const value<bits> &>(*this);
567 }
568 };
569
570 // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here.
571 template<class T, class U>
572 struct concat_expr : public expr_base<concat_expr<T, U>> {
573 static constexpr size_t bits = T::bits + U::bits;
574
575 T &ms_expr;
576 U &ls_expr;
577
578 concat_expr(T &ms_expr, U &ls_expr) : ms_expr(ms_expr), ls_expr(ls_expr) {}
579 concat_expr(const concat_expr<T, U> &) = delete;
580
581 CXXRTL_ALWAYS_INLINE
582 operator value<bits>() const {
583 value<bits> ms_shifted = static_cast<const value<T::bits> &>(ms_expr)
584 .template rzext<bits>();
585 value<bits> ls_extended = static_cast<const value<U::bits> &>(ls_expr)
586 .template zext<bits>();
587 return ms_shifted.bit_or(ls_extended);
588 }
589
590 CXXRTL_ALWAYS_INLINE
591 concat_expr<T, U> &operator=(const value<bits> &rhs) {
592 ms_expr = rhs.template rtrunc<T::bits>();
593 ls_expr = rhs.template trunc<U::bits>();
594 return *this;
595 }
596
597 // A helper that forces the cast to value<>, which allows deduction to work.
598 CXXRTL_ALWAYS_INLINE
599 value<bits> val() const {
600 return static_cast<const value<bits> &>(*this);
601 }
602 };
603
604 // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues.
605 //
606 // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because
607 // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of
608 // these snippets perform use-after-free:
609 //
610 // const auto &a = val.slice<7,0>().slice<1>();
611 // value<1> b = a;
612 //
613 // auto &&c = val.slice<7,0>().slice<1>();
614 // c = value<1>{1u};
615 //
616 // An easy way to write code using slices and concatenations safely is to follow two simple rules:
617 // * Never explicitly name any type except `value<W>` or `const value<W> &`.
618 // * Never use a `const auto &` or `auto &&` in any such expression.
619 // Then, any code that compiles will be well-defined.
620 template<class T>
621 struct expr_base {
622 template<size_t Stop, size_t Start = Stop>
623 CXXRTL_ALWAYS_INLINE
624 slice_expr<const T, Stop, Start> slice() const {
625 return {*static_cast<const T *>(this)};
626 }
627
628 template<size_t Stop, size_t Start = Stop>
629 CXXRTL_ALWAYS_INLINE
630 slice_expr<T, Stop, Start> slice() {
631 return {*static_cast<T *>(this)};
632 }
633
634 template<class U>
635 CXXRTL_ALWAYS_INLINE
636 concat_expr<const T, typename std::remove_reference<const U>::type> concat(const U &other) const {
637 return {*static_cast<const T *>(this), other};
638 }
639
640 template<class U>
641 CXXRTL_ALWAYS_INLINE
642 concat_expr<T, typename std::remove_reference<U>::type> concat(U &&other) {
643 return {*static_cast<T *>(this), other};
644 }
645 };
646
647 template<size_t Bits>
648 std::ostream &operator<<(std::ostream &os, const value<Bits> &val) {
649 auto old_flags = os.flags(std::ios::right);
650 auto old_width = os.width(0);
651 auto old_fill = os.fill('0');
652 os << val.bits << '\'' << std::hex;
653 for (size_t n = val.chunks - 1; n != (size_t)-1; n--) {
654 if (n == val.chunks - 1 && Bits % value<Bits>::chunk::bits != 0)
655 os.width((Bits % value<Bits>::chunk::bits + 3) / 4);
656 else
657 os.width((value<Bits>::chunk::bits + 3) / 4);
658 os << val.data[n];
659 }
660 os.fill(old_fill);
661 os.width(old_width);
662 os.flags(old_flags);
663 return os;
664 }
665
666 template<size_t Bits>
667 struct wire {
668 static constexpr size_t bits = Bits;
669
670 value<Bits> curr;
671 value<Bits> next;
672
673 wire() = default;
674 explicit constexpr wire(const value<Bits> &init) : curr(init), next(init) {}
675 template<typename... Init>
676 explicit constexpr wire(Init ...init) : curr{init...}, next{init...} {}
677
678 // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire,
679 // e.g. because a module is built with a different optimization level, then existing code could
680 // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure
681 // this doesn't happen, prohibit copying and copy-assigning wires.
682 wire(const wire<Bits> &) = delete;
683 wire<Bits> &operator=(const wire<Bits> &) = delete;
684
685 wire(wire<Bits> &&) = default;
686 wire<Bits> &operator=(wire<Bits> &&) = default;
687
688 template<class IntegerT>
689 CXXRTL_ALWAYS_INLINE
690 IntegerT get() const {
691 return curr.template get<IntegerT>();
692 }
693
694 template<class IntegerT>
695 CXXRTL_ALWAYS_INLINE
696 void set(IntegerT other) {
697 next.template set<IntegerT>(other);
698 }
699
700 bool commit() {
701 if (curr != next) {
702 curr = next;
703 return true;
704 }
705 return false;
706 }
707 };
708
709 template<size_t Bits>
710 std::ostream &operator<<(std::ostream &os, const wire<Bits> &val) {
711 os << val.curr;
712 return os;
713 }
714
715 template<size_t Width>
716 struct memory {
717 std::vector<value<Width>> data;
718
719 size_t depth() const {
720 return data.size();
721 }
722
723 memory() = delete;
724 explicit memory(size_t depth) : data(depth) {}
725
726 memory(const memory<Width> &) = delete;
727 memory<Width> &operator=(const memory<Width> &) = delete;
728
729 memory(memory<Width> &&) = default;
730 memory<Width> &operator=(memory<Width> &&) = default;
731
732 // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
733 // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
734 // construct an initializer_list in a constexpr (or something) and so if you try to do that the whole thing is
735 // first copied on the stack (probably overflowing it) and then again into `data`.
736 template<size_t Size>
737 struct init {
738 size_t offset;
739 value<Width> data[Size];
740 };
741
742 template<size_t... InitSize>
743 explicit memory(size_t depth, const init<InitSize> &...init) : data(depth) {
744 data.resize(depth);
745 // This utterly reprehensible construct is the most reasonable way to apply a function to every element
746 // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list.
747 auto _ = {std::move(std::begin(init.data), std::end(init.data), data.begin() + init.offset)...};
748 (void)_;
749 }
750
751 // An operator for direct memory reads. May be used at any time during the simulation.
752 const value<Width> &operator [](size_t index) const {
753 assert(index < data.size());
754 return data[index];
755 }
756
757 // An operator for direct memory writes. May only be used before the simulation is started. If used
758 // after the simulation is started, the design may malfunction.
759 value<Width> &operator [](size_t index) {
760 assert(index < data.size());
761 return data[index];
762 }
763
764 // A simple way to make a writable memory would be to use an array of wires instead of an array of values.
765 // However, there are two significant downsides to this approach: first, it has large overhead (2× space
766 // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
767 // priorities. Although in principle write ports could be ordered or conditionally enabled in generated
768 // code based on their priorities and selected addresses, the feedback arc set problem is computationally
769 // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
770 // a particular write port evaluation order.
771 //
772 // The approach used here instead is to queue writes into a buffer during the eval phase, then perform
773 // the writes during the commit phase in the priority order. This approach has low overhead, with both space
774 // and time proportional to the amount of write ports. Because virtually every memory in a practical design
775 // has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
776 struct write {
777 size_t index;
778 value<Width> val;
779 value<Width> mask;
780 int priority;
781 };
782 std::vector<write> write_queue;
783
784 void update(size_t index, const value<Width> &val, const value<Width> &mask, int priority = 0) {
785 assert(index < data.size());
786 // Queue up the write while keeping the queue sorted by priority.
787 write_queue.insert(
788 std::upper_bound(write_queue.begin(), write_queue.end(), priority,
789 [](const int a, const write& b) { return a < b.priority; }),
790 write { index, val, mask, priority });
791 }
792
793 bool commit() {
794 bool changed = false;
795 for (const write &entry : write_queue) {
796 value<Width> elem = data[entry.index];
797 elem = elem.update(entry.val, entry.mask);
798 changed |= (data[entry.index] != elem);
799 data[entry.index] = elem;
800 }
801 write_queue.clear();
802 return changed;
803 }
804 };
805
806 struct metadata {
807 const enum {
808 MISSING = 0,
809 UINT = 1,
810 SINT = 2,
811 STRING = 3,
812 DOUBLE = 4,
813 } value_type;
814
815 // In debug mode, using the wrong .as_*() function will assert.
816 // In release mode, using the wrong .as_*() function will safely return a default value.
817 const unsigned uint_value = 0;
818 const signed sint_value = 0;
819 const std::string string_value = "";
820 const double double_value = 0.0;
821
822 metadata() : value_type(MISSING) {}
823 metadata(unsigned value) : value_type(UINT), uint_value(value) {}
824 metadata(signed value) : value_type(SINT), sint_value(value) {}
825 metadata(const std::string &value) : value_type(STRING), string_value(value) {}
826 metadata(const char *value) : value_type(STRING), string_value(value) {}
827 metadata(double value) : value_type(DOUBLE), double_value(value) {}
828
829 metadata(const metadata &) = default;
830 metadata &operator=(const metadata &) = delete;
831
832 unsigned as_uint() const {
833 assert(value_type == UINT);
834 return uint_value;
835 }
836
837 signed as_sint() const {
838 assert(value_type == SINT);
839 return sint_value;
840 }
841
842 const std::string &as_string() const {
843 assert(value_type == STRING);
844 return string_value;
845 }
846
847 double as_double() const {
848 assert(value_type == DOUBLE);
849 return double_value;
850 }
851 };
852
853 typedef std::map<std::string, metadata> metadata_map;
854
855 // Tag class to disambiguate values/wires and their aliases.
856 struct debug_alias {};
857
858 // Tag declaration to disambiguate values and debug outlines.
859 using debug_outline = ::_cxxrtl_outline;
860
861 // This structure is intended for consumption via foreign function interfaces, like Python's ctypes.
862 // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++.
863 //
864 // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used
865 // in the C API, or it would not be possible to cast between the pointers to these.
866 struct debug_item : ::cxxrtl_object {
867 // Object types.
868 enum : uint32_t {
869 VALUE = CXXRTL_VALUE,
870 WIRE = CXXRTL_WIRE,
871 MEMORY = CXXRTL_MEMORY,
872 ALIAS = CXXRTL_ALIAS,
873 OUTLINE = CXXRTL_OUTLINE,
874 };
875
876 // Object flags.
877 enum : uint32_t {
878 INPUT = CXXRTL_INPUT,
879 OUTPUT = CXXRTL_OUTPUT,
880 INOUT = CXXRTL_INOUT,
881 DRIVEN_SYNC = CXXRTL_DRIVEN_SYNC,
882 DRIVEN_COMB = CXXRTL_DRIVEN_COMB,
883 UNDRIVEN = CXXRTL_UNDRIVEN,
884 };
885
886 debug_item(const ::cxxrtl_object &object) : cxxrtl_object(object) {}
887
888 template<size_t Bits>
889 debug_item(value<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) {
890 static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
891 "value<Bits> is not compatible with C layout");
892 type = VALUE;
893 flags = flags_;
894 width = Bits;
895 lsb_at = lsb_offset;
896 depth = 1;
897 zero_at = 0;
898 curr = item.data;
899 next = item.data;
900 outline = nullptr;
901 }
902
903 template<size_t Bits>
904 debug_item(const value<Bits> &item, size_t lsb_offset = 0) {
905 static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
906 "value<Bits> is not compatible with C layout");
907 type = VALUE;
908 flags = DRIVEN_COMB;
909 width = Bits;
910 lsb_at = lsb_offset;
911 depth = 1;
912 zero_at = 0;
913 curr = const_cast<chunk_t*>(item.data);
914 next = nullptr;
915 outline = nullptr;
916 }
917
918 template<size_t Bits>
919 debug_item(wire<Bits> &item, size_t lsb_offset = 0, uint32_t flags_ = 0) {
920 static_assert(sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) &&
921 sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t),
922 "wire<Bits> is not compatible with C layout");
923 type = WIRE;
924 flags = flags_;
925 width = Bits;
926 lsb_at = lsb_offset;
927 depth = 1;
928 zero_at = 0;
929 curr = item.curr.data;
930 next = item.next.data;
931 outline = nullptr;
932 }
933
934 template<size_t Width>
935 debug_item(memory<Width> &item, size_t zero_offset = 0) {
936 static_assert(sizeof(item.data[0]) == value<Width>::chunks * sizeof(chunk_t),
937 "memory<Width> is not compatible with C layout");
938 type = MEMORY;
939 flags = 0;
940 width = Width;
941 lsb_at = 0;
942 depth = item.data.size();
943 zero_at = zero_offset;
944 curr = item.data.empty() ? nullptr : item.data[0].data;
945 next = nullptr;
946 outline = nullptr;
947 }
948
949 template<size_t Bits>
950 debug_item(debug_alias, const value<Bits> &item, size_t lsb_offset = 0) {
951 static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
952 "value<Bits> is not compatible with C layout");
953 type = ALIAS;
954 flags = DRIVEN_COMB;
955 width = Bits;
956 lsb_at = lsb_offset;
957 depth = 1;
958 zero_at = 0;
959 curr = const_cast<chunk_t*>(item.data);
960 next = nullptr;
961 outline = nullptr;
962 }
963
964 template<size_t Bits>
965 debug_item(debug_alias, const wire<Bits> &item, size_t lsb_offset = 0) {
966 static_assert(sizeof(item.curr) == value<Bits>::chunks * sizeof(chunk_t) &&
967 sizeof(item.next) == value<Bits>::chunks * sizeof(chunk_t),
968 "wire<Bits> is not compatible with C layout");
969 type = ALIAS;
970 flags = DRIVEN_COMB;
971 width = Bits;
972 lsb_at = lsb_offset;
973 depth = 1;
974 zero_at = 0;
975 curr = const_cast<chunk_t*>(item.curr.data);
976 next = nullptr;
977 outline = nullptr;
978 }
979
980 template<size_t Bits>
981 debug_item(debug_outline &group, const value<Bits> &item, size_t lsb_offset = 0) {
982 static_assert(sizeof(item) == value<Bits>::chunks * sizeof(chunk_t),
983 "value<Bits> is not compatible with C layout");
984 type = OUTLINE;
985 flags = DRIVEN_COMB;
986 width = Bits;
987 lsb_at = lsb_offset;
988 depth = 1;
989 zero_at = 0;
990 curr = const_cast<chunk_t*>(item.data);
991 next = nullptr;
992 outline = &group;
993 }
994 };
995 static_assert(std::is_standard_layout<debug_item>::value, "debug_item is not compatible with C layout");
996
997 struct debug_items {
998 std::map<std::string, std::vector<debug_item>> table;
999
1000 void add(const std::string &name, debug_item &&item) {
1001 std::vector<debug_item> &parts = table[name];
1002 parts.emplace_back(item);
1003 std::sort(parts.begin(), parts.end(),
1004 [](const debug_item &a, const debug_item &b) {
1005 return a.lsb_at < b.lsb_at;
1006 });
1007 }
1008
1009 size_t count(const std::string &name) const {
1010 if (table.count(name) == 0)
1011 return 0;
1012 return table.at(name).size();
1013 }
1014
1015 const std::vector<debug_item> &parts_at(const std::string &name) const {
1016 return table.at(name);
1017 }
1018
1019 const debug_item &at(const std::string &name) const {
1020 const std::vector<debug_item> &parts = table.at(name);
1021 assert(parts.size() == 1);
1022 return parts.at(0);
1023 }
1024
1025 const debug_item &operator [](const std::string &name) const {
1026 return at(name);
1027 }
1028 };
1029
1030 // Tag class to disambiguate module move constructor and module constructor that takes black boxes
1031 // out of another instance of the module.
1032 struct adopt {};
1033
1034 struct module {
1035 module() {}
1036 virtual ~module() {}
1037
1038 // Modules with black boxes cannot be copied. Although not all designs include black boxes,
1039 // delete the copy constructor and copy assignment operator to make sure that any downstream
1040 // code that manipulates modules doesn't accidentally depend on their availability.
1041 module(const module &) = delete;
1042 module &operator=(const module &) = delete;
1043
1044 module(module &&) = default;
1045 module &operator=(module &&) = default;
1046
1047 virtual void reset() = 0;
1048
1049 virtual bool eval() = 0;
1050 virtual bool commit() = 0;
1051
1052 size_t step() {
1053 size_t deltas = 0;
1054 bool converged = false;
1055 do {
1056 converged = eval();
1057 deltas++;
1058 } while (commit() && !converged);
1059 return deltas;
1060 }
1061
1062 virtual void debug_info(debug_items &items, std::string path = "") {
1063 (void)items, (void)path;
1064 }
1065 };
1066
1067 } // namespace cxxrtl
1068
1069 // Internal structures used to communicate with the implementation of the C interface.
1070
1071 typedef struct _cxxrtl_toplevel {
1072 std::unique_ptr<cxxrtl::module> module;
1073 } *cxxrtl_toplevel;
1074
1075 typedef struct _cxxrtl_outline {
1076 std::function<void()> eval;
1077 } *cxxrtl_outline;
1078
1079 // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic
1080 // and indepenent of Yosys implementation details.
1081 //
1082 // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these
1083 // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands,
1084 // whereas basic operations on arbitrary width values require operands to be of the same width. These functions
1085 // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u`
1086 // if the corresponding operand is unsigned, and `s` if it is signed.
1087 namespace cxxrtl_yosys {
1088
1089 using namespace cxxrtl;
1090
1091 // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own.
1092 template<class T>
1093 CXXRTL_ALWAYS_INLINE
1094 constexpr T max(const T &a, const T &b) {
1095 return a > b ? a : b;
1096 }
1097
1098 // Logic operations
1099 template<size_t BitsY, size_t BitsA>
1100 CXXRTL_ALWAYS_INLINE
1101 value<BitsY> logic_not(const value<BitsA> &a) {
1102 return value<BitsY> { a ? 0u : 1u };
1103 }
1104
1105 template<size_t BitsY, size_t BitsA, size_t BitsB>
1106 CXXRTL_ALWAYS_INLINE
1107 value<BitsY> logic_and(const value<BitsA> &a, const value<BitsB> &b) {
1108 return value<BitsY> { (bool(a) && bool(b)) ? 1u : 0u };
1109 }
1110
1111 template<size_t BitsY, size_t BitsA, size_t BitsB>
1112 CXXRTL_ALWAYS_INLINE
1113 value<BitsY> logic_or(const value<BitsA> &a, const value<BitsB> &b) {
1114 return value<BitsY> { (bool(a) || bool(b)) ? 1u : 0u };
1115 }
1116
1117 // Reduction operations
1118 template<size_t BitsY, size_t BitsA>
1119 CXXRTL_ALWAYS_INLINE
1120 value<BitsY> reduce_and(const value<BitsA> &a) {
1121 return value<BitsY> { a.bit_not().is_zero() ? 1u : 0u };
1122 }
1123
1124 template<size_t BitsY, size_t BitsA>
1125 CXXRTL_ALWAYS_INLINE
1126 value<BitsY> reduce_or(const value<BitsA> &a) {
1127 return value<BitsY> { a ? 1u : 0u };
1128 }
1129
1130 template<size_t BitsY, size_t BitsA>
1131 CXXRTL_ALWAYS_INLINE
1132 value<BitsY> reduce_xor(const value<BitsA> &a) {
1133 return value<BitsY> { (a.ctpop() % 2) ? 1u : 0u };
1134 }
1135
1136 template<size_t BitsY, size_t BitsA>
1137 CXXRTL_ALWAYS_INLINE
1138 value<BitsY> reduce_xnor(const value<BitsA> &a) {
1139 return value<BitsY> { (a.ctpop() % 2) ? 0u : 1u };
1140 }
1141
1142 template<size_t BitsY, size_t BitsA>
1143 CXXRTL_ALWAYS_INLINE
1144 value<BitsY> reduce_bool(const value<BitsA> &a) {
1145 return value<BitsY> { a ? 1u : 0u };
1146 }
1147
1148 // Bitwise operations
1149 template<size_t BitsY, size_t BitsA>
1150 CXXRTL_ALWAYS_INLINE
1151 value<BitsY> not_u(const value<BitsA> &a) {
1152 return a.template zcast<BitsY>().bit_not();
1153 }
1154
1155 template<size_t BitsY, size_t BitsA>
1156 CXXRTL_ALWAYS_INLINE
1157 value<BitsY> not_s(const value<BitsA> &a) {
1158 return a.template scast<BitsY>().bit_not();
1159 }
1160
1161 template<size_t BitsY, size_t BitsA, size_t BitsB>
1162 CXXRTL_ALWAYS_INLINE
1163 value<BitsY> and_uu(const value<BitsA> &a, const value<BitsB> &b) {
1164 return a.template zcast<BitsY>().bit_and(b.template zcast<BitsY>());
1165 }
1166
1167 template<size_t BitsY, size_t BitsA, size_t BitsB>
1168 CXXRTL_ALWAYS_INLINE
1169 value<BitsY> and_ss(const value<BitsA> &a, const value<BitsB> &b) {
1170 return a.template scast<BitsY>().bit_and(b.template scast<BitsY>());
1171 }
1172
1173 template<size_t BitsY, size_t BitsA, size_t BitsB>
1174 CXXRTL_ALWAYS_INLINE
1175 value<BitsY> or_uu(const value<BitsA> &a, const value<BitsB> &b) {
1176 return a.template zcast<BitsY>().bit_or(b.template zcast<BitsY>());
1177 }
1178
1179 template<size_t BitsY, size_t BitsA, size_t BitsB>
1180 CXXRTL_ALWAYS_INLINE
1181 value<BitsY> or_ss(const value<BitsA> &a, const value<BitsB> &b) {
1182 return a.template scast<BitsY>().bit_or(b.template scast<BitsY>());
1183 }
1184
1185 template<size_t BitsY, size_t BitsA, size_t BitsB>
1186 CXXRTL_ALWAYS_INLINE
1187 value<BitsY> xor_uu(const value<BitsA> &a, const value<BitsB> &b) {
1188 return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>());
1189 }
1190
1191 template<size_t BitsY, size_t BitsA, size_t BitsB>
1192 CXXRTL_ALWAYS_INLINE
1193 value<BitsY> xor_ss(const value<BitsA> &a, const value<BitsB> &b) {
1194 return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>());
1195 }
1196
1197 template<size_t BitsY, size_t BitsA, size_t BitsB>
1198 CXXRTL_ALWAYS_INLINE
1199 value<BitsY> xnor_uu(const value<BitsA> &a, const value<BitsB> &b) {
1200 return a.template zcast<BitsY>().bit_xor(b.template zcast<BitsY>()).bit_not();
1201 }
1202
1203 template<size_t BitsY, size_t BitsA, size_t BitsB>
1204 CXXRTL_ALWAYS_INLINE
1205 value<BitsY> xnor_ss(const value<BitsA> &a, const value<BitsB> &b) {
1206 return a.template scast<BitsY>().bit_xor(b.template scast<BitsY>()).bit_not();
1207 }
1208
1209 template<size_t BitsY, size_t BitsA, size_t BitsB>
1210 CXXRTL_ALWAYS_INLINE
1211 value<BitsY> shl_uu(const value<BitsA> &a, const value<BitsB> &b) {
1212 return a.template zcast<BitsY>().template shl(b);
1213 }
1214
1215 template<size_t BitsY, size_t BitsA, size_t BitsB>
1216 CXXRTL_ALWAYS_INLINE
1217 value<BitsY> shl_su(const value<BitsA> &a, const value<BitsB> &b) {
1218 return a.template scast<BitsY>().template shl(b);
1219 }
1220
1221 template<size_t BitsY, size_t BitsA, size_t BitsB>
1222 CXXRTL_ALWAYS_INLINE
1223 value<BitsY> sshl_uu(const value<BitsA> &a, const value<BitsB> &b) {
1224 return a.template zcast<BitsY>().template shl(b);
1225 }
1226
1227 template<size_t BitsY, size_t BitsA, size_t BitsB>
1228 CXXRTL_ALWAYS_INLINE
1229 value<BitsY> sshl_su(const value<BitsA> &a, const value<BitsB> &b) {
1230 return a.template scast<BitsY>().template shl(b);
1231 }
1232
1233 template<size_t BitsY, size_t BitsA, size_t BitsB>
1234 CXXRTL_ALWAYS_INLINE
1235 value<BitsY> shr_uu(const value<BitsA> &a, const value<BitsB> &b) {
1236 return a.template shr(b).template zcast<BitsY>();
1237 }
1238
1239 template<size_t BitsY, size_t BitsA, size_t BitsB>
1240 CXXRTL_ALWAYS_INLINE
1241 value<BitsY> shr_su(const value<BitsA> &a, const value<BitsB> &b) {
1242 return a.template shr(b).template scast<BitsY>();
1243 }
1244
1245 template<size_t BitsY, size_t BitsA, size_t BitsB>
1246 CXXRTL_ALWAYS_INLINE
1247 value<BitsY> sshr_uu(const value<BitsA> &a, const value<BitsB> &b) {
1248 return a.template shr(b).template zcast<BitsY>();
1249 }
1250
1251 template<size_t BitsY, size_t BitsA, size_t BitsB>
1252 CXXRTL_ALWAYS_INLINE
1253 value<BitsY> sshr_su(const value<BitsA> &a, const value<BitsB> &b) {
1254 return a.template sshr(b).template scast<BitsY>();
1255 }
1256
1257 template<size_t BitsY, size_t BitsA, size_t BitsB>
1258 CXXRTL_ALWAYS_INLINE
1259 value<BitsY> shift_uu(const value<BitsA> &a, const value<BitsB> &b) {
1260 return shr_uu<BitsY>(a, b);
1261 }
1262
1263 template<size_t BitsY, size_t BitsA, size_t BitsB>
1264 CXXRTL_ALWAYS_INLINE
1265 value<BitsY> shift_su(const value<BitsA> &a, const value<BitsB> &b) {
1266 return shr_su<BitsY>(a, b);
1267 }
1268
1269 template<size_t BitsY, size_t BitsA, size_t BitsB>
1270 CXXRTL_ALWAYS_INLINE
1271 value<BitsY> shift_us(const value<BitsA> &a, const value<BitsB> &b) {
1272 return b.is_neg() ? shl_uu<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_uu<BitsY>(a, b);
1273 }
1274
1275 template<size_t BitsY, size_t BitsA, size_t BitsB>
1276 CXXRTL_ALWAYS_INLINE
1277 value<BitsY> shift_ss(const value<BitsA> &a, const value<BitsB> &b) {
1278 return b.is_neg() ? shl_su<BitsY>(a, b.template sext<BitsB + 1>().neg()) : shr_su<BitsY>(a, b);
1279 }
1280
1281 template<size_t BitsY, size_t BitsA, size_t BitsB>
1282 CXXRTL_ALWAYS_INLINE
1283 value<BitsY> shiftx_uu(const value<BitsA> &a, const value<BitsB> &b) {
1284 return shift_uu<BitsY>(a, b);
1285 }
1286
1287 template<size_t BitsY, size_t BitsA, size_t BitsB>
1288 CXXRTL_ALWAYS_INLINE
1289 value<BitsY> shiftx_su(const value<BitsA> &a, const value<BitsB> &b) {
1290 return shift_su<BitsY>(a, b);
1291 }
1292
1293 template<size_t BitsY, size_t BitsA, size_t BitsB>
1294 CXXRTL_ALWAYS_INLINE
1295 value<BitsY> shiftx_us(const value<BitsA> &a, const value<BitsB> &b) {
1296 return shift_us<BitsY>(a, b);
1297 }
1298
1299 template<size_t BitsY, size_t BitsA, size_t BitsB>
1300 CXXRTL_ALWAYS_INLINE
1301 value<BitsY> shiftx_ss(const value<BitsA> &a, const value<BitsB> &b) {
1302 return shift_ss<BitsY>(a, b);
1303 }
1304
1305 // Comparison operations
1306 template<size_t BitsY, size_t BitsA, size_t BitsB>
1307 CXXRTL_ALWAYS_INLINE
1308 value<BitsY> eq_uu(const value<BitsA> &a, const value<BitsB> &b) {
1309 constexpr size_t BitsExt = max(BitsA, BitsB);
1310 return value<BitsY>{ a.template zext<BitsExt>() == b.template zext<BitsExt>() ? 1u : 0u };
1311 }
1312
1313 template<size_t BitsY, size_t BitsA, size_t BitsB>
1314 CXXRTL_ALWAYS_INLINE
1315 value<BitsY> eq_ss(const value<BitsA> &a, const value<BitsB> &b) {
1316 constexpr size_t BitsExt = max(BitsA, BitsB);
1317 return value<BitsY>{ a.template sext<BitsExt>() == b.template sext<BitsExt>() ? 1u : 0u };
1318 }
1319
1320 template<size_t BitsY, size_t BitsA, size_t BitsB>
1321 CXXRTL_ALWAYS_INLINE
1322 value<BitsY> ne_uu(const value<BitsA> &a, const value<BitsB> &b) {
1323 constexpr size_t BitsExt = max(BitsA, BitsB);
1324 return value<BitsY>{ a.template zext<BitsExt>() != b.template zext<BitsExt>() ? 1u : 0u };
1325 }
1326
1327 template<size_t BitsY, size_t BitsA, size_t BitsB>
1328 CXXRTL_ALWAYS_INLINE
1329 value<BitsY> ne_ss(const value<BitsA> &a, const value<BitsB> &b) {
1330 constexpr size_t BitsExt = max(BitsA, BitsB);
1331 return value<BitsY>{ a.template sext<BitsExt>() != b.template sext<BitsExt>() ? 1u : 0u };
1332 }
1333
1334 template<size_t BitsY, size_t BitsA, size_t BitsB>
1335 CXXRTL_ALWAYS_INLINE
1336 value<BitsY> eqx_uu(const value<BitsA> &a, const value<BitsB> &b) {
1337 return eq_uu<BitsY>(a, b);
1338 }
1339
1340 template<size_t BitsY, size_t BitsA, size_t BitsB>
1341 CXXRTL_ALWAYS_INLINE
1342 value<BitsY> eqx_ss(const value<BitsA> &a, const value<BitsB> &b) {
1343 return eq_ss<BitsY>(a, b);
1344 }
1345
1346 template<size_t BitsY, size_t BitsA, size_t BitsB>
1347 CXXRTL_ALWAYS_INLINE
1348 value<BitsY> nex_uu(const value<BitsA> &a, const value<BitsB> &b) {
1349 return ne_uu<BitsY>(a, b);
1350 }
1351
1352 template<size_t BitsY, size_t BitsA, size_t BitsB>
1353 CXXRTL_ALWAYS_INLINE
1354 value<BitsY> nex_ss(const value<BitsA> &a, const value<BitsB> &b) {
1355 return ne_ss<BitsY>(a, b);
1356 }
1357
1358 template<size_t BitsY, size_t BitsA, size_t BitsB>
1359 CXXRTL_ALWAYS_INLINE
1360 value<BitsY> gt_uu(const value<BitsA> &a, const value<BitsB> &b) {
1361 constexpr size_t BitsExt = max(BitsA, BitsB);
1362 return value<BitsY> { b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u };
1363 }
1364
1365 template<size_t BitsY, size_t BitsA, size_t BitsB>
1366 CXXRTL_ALWAYS_INLINE
1367 value<BitsY> gt_ss(const value<BitsA> &a, const value<BitsB> &b) {
1368 constexpr size_t BitsExt = max(BitsA, BitsB);
1369 return value<BitsY> { b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u };
1370 }
1371
1372 template<size_t BitsY, size_t BitsA, size_t BitsB>
1373 CXXRTL_ALWAYS_INLINE
1374 value<BitsY> ge_uu(const value<BitsA> &a, const value<BitsB> &b) {
1375 constexpr size_t BitsExt = max(BitsA, BitsB);
1376 return value<BitsY> { !a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u };
1377 }
1378
1379 template<size_t BitsY, size_t BitsA, size_t BitsB>
1380 CXXRTL_ALWAYS_INLINE
1381 value<BitsY> ge_ss(const value<BitsA> &a, const value<BitsB> &b) {
1382 constexpr size_t BitsExt = max(BitsA, BitsB);
1383 return value<BitsY> { !a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u };
1384 }
1385
1386 template<size_t BitsY, size_t BitsA, size_t BitsB>
1387 CXXRTL_ALWAYS_INLINE
1388 value<BitsY> lt_uu(const value<BitsA> &a, const value<BitsB> &b) {
1389 constexpr size_t BitsExt = max(BitsA, BitsB);
1390 return value<BitsY> { a.template zext<BitsExt>().ucmp(b.template zext<BitsExt>()) ? 1u : 0u };
1391 }
1392
1393 template<size_t BitsY, size_t BitsA, size_t BitsB>
1394 CXXRTL_ALWAYS_INLINE
1395 value<BitsY> lt_ss(const value<BitsA> &a, const value<BitsB> &b) {
1396 constexpr size_t BitsExt = max(BitsA, BitsB);
1397 return value<BitsY> { a.template sext<BitsExt>().scmp(b.template sext<BitsExt>()) ? 1u : 0u };
1398 }
1399
1400 template<size_t BitsY, size_t BitsA, size_t BitsB>
1401 CXXRTL_ALWAYS_INLINE
1402 value<BitsY> le_uu(const value<BitsA> &a, const value<BitsB> &b) {
1403 constexpr size_t BitsExt = max(BitsA, BitsB);
1404 return value<BitsY> { !b.template zext<BitsExt>().ucmp(a.template zext<BitsExt>()) ? 1u : 0u };
1405 }
1406
1407 template<size_t BitsY, size_t BitsA, size_t BitsB>
1408 CXXRTL_ALWAYS_INLINE
1409 value<BitsY> le_ss(const value<BitsA> &a, const value<BitsB> &b) {
1410 constexpr size_t BitsExt = max(BitsA, BitsB);
1411 return value<BitsY> { !b.template sext<BitsExt>().scmp(a.template sext<BitsExt>()) ? 1u : 0u };
1412 }
1413
1414 // Arithmetic operations
1415 template<size_t BitsY, size_t BitsA>
1416 CXXRTL_ALWAYS_INLINE
1417 value<BitsY> pos_u(const value<BitsA> &a) {
1418 return a.template zcast<BitsY>();
1419 }
1420
1421 template<size_t BitsY, size_t BitsA>
1422 CXXRTL_ALWAYS_INLINE
1423 value<BitsY> pos_s(const value<BitsA> &a) {
1424 return a.template scast<BitsY>();
1425 }
1426
1427 template<size_t BitsY, size_t BitsA>
1428 CXXRTL_ALWAYS_INLINE
1429 value<BitsY> neg_u(const value<BitsA> &a) {
1430 return a.template zcast<BitsY>().neg();
1431 }
1432
1433 template<size_t BitsY, size_t BitsA>
1434 CXXRTL_ALWAYS_INLINE
1435 value<BitsY> neg_s(const value<BitsA> &a) {
1436 return a.template scast<BitsY>().neg();
1437 }
1438
1439 template<size_t BitsY, size_t BitsA, size_t BitsB>
1440 CXXRTL_ALWAYS_INLINE
1441 value<BitsY> add_uu(const value<BitsA> &a, const value<BitsB> &b) {
1442 return a.template zcast<BitsY>().add(b.template zcast<BitsY>());
1443 }
1444
1445 template<size_t BitsY, size_t BitsA, size_t BitsB>
1446 CXXRTL_ALWAYS_INLINE
1447 value<BitsY> add_ss(const value<BitsA> &a, const value<BitsB> &b) {
1448 return a.template scast<BitsY>().add(b.template scast<BitsY>());
1449 }
1450
1451 template<size_t BitsY, size_t BitsA, size_t BitsB>
1452 CXXRTL_ALWAYS_INLINE
1453 value<BitsY> sub_uu(const value<BitsA> &a, const value<BitsB> &b) {
1454 return a.template zcast<BitsY>().sub(b.template zcast<BitsY>());
1455 }
1456
1457 template<size_t BitsY, size_t BitsA, size_t BitsB>
1458 CXXRTL_ALWAYS_INLINE
1459 value<BitsY> sub_ss(const value<BitsA> &a, const value<BitsB> &b) {
1460 return a.template scast<BitsY>().sub(b.template scast<BitsY>());
1461 }
1462
1463 template<size_t BitsY, size_t BitsA, size_t BitsB>
1464 CXXRTL_ALWAYS_INLINE
1465 value<BitsY> mul_uu(const value<BitsA> &a, const value<BitsB> &b) {
1466 constexpr size_t BitsM = BitsA >= BitsB ? BitsA : BitsB;
1467 return a.template zcast<BitsM>().template mul<BitsY>(b.template zcast<BitsM>());
1468 }
1469
1470 template<size_t BitsY, size_t BitsA, size_t BitsB>
1471 CXXRTL_ALWAYS_INLINE
1472 value<BitsY> mul_ss(const value<BitsA> &a, const value<BitsB> &b) {
1473 return a.template scast<BitsY>().template mul<BitsY>(b.template scast<BitsY>());
1474 }
1475
1476 template<size_t BitsY, size_t BitsA, size_t BitsB>
1477 CXXRTL_ALWAYS_INLINE
1478 std::pair<value<BitsY>, value<BitsY>> divmod_uu(const value<BitsA> &a, const value<BitsB> &b) {
1479 constexpr size_t Bits = max(BitsY, max(BitsA, BitsB));
1480 value<Bits> quotient;
1481 value<Bits> dividend = a.template zext<Bits>();
1482 value<Bits> divisor = b.template zext<Bits>();
1483 if (dividend.ucmp(divisor))
1484 return {/*quotient=*/value<BitsY> { 0u }, /*remainder=*/dividend.template trunc<BitsY>()};
1485 uint32_t divisor_shift = dividend.ctlz() - divisor.ctlz();
1486 divisor = divisor.shl(value<32> { divisor_shift });
1487 for (size_t step = 0; step <= divisor_shift; step++) {
1488 quotient = quotient.shl(value<1> { 1u });
1489 if (!dividend.ucmp(divisor)) {
1490 dividend = dividend.sub(divisor);
1491 quotient.set_bit(0, true);
1492 }
1493 divisor = divisor.shr(value<1> { 1u });
1494 }
1495 return {quotient.template trunc<BitsY>(), /*remainder=*/dividend.template trunc<BitsY>()};
1496 }
1497
1498 template<size_t BitsY, size_t BitsA, size_t BitsB>
1499 CXXRTL_ALWAYS_INLINE
1500 std::pair<value<BitsY>, value<BitsY>> divmod_ss(const value<BitsA> &a, const value<BitsB> &b) {
1501 value<BitsA + 1> ua = a.template sext<BitsA + 1>();
1502 value<BitsB + 1> ub = b.template sext<BitsB + 1>();
1503 if (ua.is_neg()) ua = ua.neg();
1504 if (ub.is_neg()) ub = ub.neg();
1505 value<BitsY> y, r;
1506 std::tie(y, r) = divmod_uu<BitsY>(ua, ub);
1507 if (a.is_neg() != b.is_neg()) y = y.neg();
1508 if (a.is_neg()) r = r.neg();
1509 return {y, r};
1510 }
1511
1512 template<size_t BitsY, size_t BitsA, size_t BitsB>
1513 CXXRTL_ALWAYS_INLINE
1514 value<BitsY> div_uu(const value<BitsA> &a, const value<BitsB> &b) {
1515 return divmod_uu<BitsY>(a, b).first;
1516 }
1517
1518 template<size_t BitsY, size_t BitsA, size_t BitsB>
1519 CXXRTL_ALWAYS_INLINE
1520 value<BitsY> div_ss(const value<BitsA> &a, const value<BitsB> &b) {
1521 return divmod_ss<BitsY>(a, b).first;
1522 }
1523
1524 template<size_t BitsY, size_t BitsA, size_t BitsB>
1525 CXXRTL_ALWAYS_INLINE
1526 value<BitsY> mod_uu(const value<BitsA> &a, const value<BitsB> &b) {
1527 return divmod_uu<BitsY>(a, b).second;
1528 }
1529
1530 template<size_t BitsY, size_t BitsA, size_t BitsB>
1531 CXXRTL_ALWAYS_INLINE
1532 value<BitsY> mod_ss(const value<BitsA> &a, const value<BitsB> &b) {
1533 return divmod_ss<BitsY>(a, b).second;
1534 }
1535
1536 // Memory helper
1537 struct memory_index {
1538 bool valid;
1539 size_t index;
1540
1541 template<size_t BitsAddr>
1542 memory_index(const value<BitsAddr> &addr, size_t offset, size_t depth) {
1543 static_assert(value<BitsAddr>::chunks <= 1, "memory address is too wide");
1544 size_t offset_index = addr.data[0];
1545
1546 valid = (offset_index >= offset && offset_index < offset + depth);
1547 index = offset_index - offset;
1548 }
1549 };
1550
1551 } // namespace cxxrtl_yosys
1552
1553 #endif