2 * yosys -- Yosys Open SYnthesis Suite
4 * Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself.
21 // The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides
22 // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass
23 // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler
24 // to unwrap the abstraction and generate efficient code.
33 #include <type_traits>
42 #include <backends/cxxrtl/cxxrtl_capi.h>
44 #ifndef __has_attribute
45 # define __has_attribute(x) 0
48 // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector.
49 // It generates a lot of specialized template functions with relatively large bodies that, when inlined
50 // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities.
51 // Because of this, most of the CXXRTL runtime must be always inlined for best performance.
52 #if __has_attribute(always_inline)
53 #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__))
55 #define CXXRTL_ALWAYS_INLINE inline
57 // Conversely, some functions in the generated code are extremely large yet very cold, with both of these
58 // properties being extreme enough to confuse C++ compilers into spending pathological amounts of time
59 // on a futile (the code becomes worse) attempt to optimize the least important parts of code.
60 #if __has_attribute(optnone)
61 #define CXXRTL_EXTREMELY_COLD __attribute__((__optnone__))
62 #elif __has_attribute(optimize)
63 #define CXXRTL_EXTREMELY_COLD __attribute__((__optimize__(0)))
65 #define CXXRTL_EXTREMELY_COLD
68 // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior
69 // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at
70 // most result in undefined simulation results).
72 // Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating
73 // the simulation into another process that should survive violating RTL contracts.
76 #define CXXRTL_ASSERT(x) assert(x)
78 #define CXXRTL_ASSERT(x)
84 // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size
85 // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving
86 // and introspecting the simulation in Python.
88 // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on
89 // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register.
90 // However, (a) most of our operations do not change those bits in the first place because of invariants that are
91 // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway.
92 // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be
93 // clobbered results in simpler generated code.
94 typedef uint32_t chunk_t
;
95 typedef uint64_t wide_chunk_t
;
99 static_assert(std::is_integral
<T
>::value
&& std::is_unsigned
<T
>::value
,
100 "chunk type must be an unsigned integral type");
102 static constexpr size_t bits
= std::numeric_limits
<T
>::digits
;
103 static constexpr T mask
= std::numeric_limits
<T
>::max();
109 template<size_t Bits
>
110 struct value
: public expr_base
<value
<Bits
>> {
111 static constexpr size_t bits
= Bits
;
113 using chunk
= chunk_traits
<chunk_t
>;
114 static constexpr chunk::type msb_mask
= (Bits
% chunk::bits
== 0) ? chunk::mask
115 : chunk::mask
>> (chunk::bits
- (Bits
% chunk::bits
));
117 static constexpr size_t chunks
= (Bits
+ chunk::bits
- 1) / chunk::bits
;
118 chunk::type data
[chunks
] = {};
121 template<typename
... Init
>
122 explicit constexpr value(Init
...init
) : data
{init
...} {}
124 value(const value
<Bits
> &) = default;
125 value
<Bits
> &operator=(const value
<Bits
> &) = default;
127 value(value
<Bits
> &&) = default;
128 value
<Bits
> &operator=(value
<Bits
> &&) = default;
130 // A (no-op) helper that forces the cast to value<>.
132 const value
<Bits
> &val() const {
136 std::string
str() const {
137 std::stringstream ss
;
142 // Conversion operations.
144 // These functions ensure that a conversion is never out of range, and should be always used, if at all
145 // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and
146 // .concat() can be used to split them into more manageable parts.
147 template<class IntegerT
>
149 IntegerT
get() const {
150 static_assert(std::numeric_limits
<IntegerT
>::is_integer
&& !std::numeric_limits
<IntegerT
>::is_signed
,
151 "get<T>() requires T to be an unsigned integral type");
152 static_assert(std::numeric_limits
<IntegerT
>::digits
>= Bits
,
153 "get<T>() requires T to be at least as wide as the value is");
155 for (size_t n
= 0; n
< chunks
; n
++)
156 result
|= IntegerT(data
[n
]) << (n
* chunk::bits
);
160 template<class IntegerT
>
162 void set(IntegerT other
) {
163 static_assert(std::numeric_limits
<IntegerT
>::is_integer
&& !std::numeric_limits
<IntegerT
>::is_signed
,
164 "set<T>() requires T to be an unsigned integral type");
165 static_assert(std::numeric_limits
<IntegerT
>::digits
>= Bits
,
166 "set<T>() requires the value to be at least as wide as T is");
167 for (size_t n
= 0; n
< chunks
; n
++)
168 data
[n
] = (other
>> (n
* chunk::bits
)) & chunk::mask
;
171 // Operations with compile-time parameters.
173 // These operations are used to implement slicing, concatenation, and blitting.
174 // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left);
175 // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right).
176 template<size_t NewBits
>
178 value
<NewBits
> trunc() const {
179 static_assert(NewBits
<= Bits
, "trunc() may not increase width");
180 value
<NewBits
> result
;
181 for (size_t n
= 0; n
< result
.chunks
; n
++)
182 result
.data
[n
] = data
[n
];
183 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
187 template<size_t NewBits
>
189 value
<NewBits
> zext() const {
190 static_assert(NewBits
>= Bits
, "zext() may not decrease width");
191 value
<NewBits
> result
;
192 for (size_t n
= 0; n
< chunks
; n
++)
193 result
.data
[n
] = data
[n
];
197 template<size_t NewBits
>
199 value
<NewBits
> sext() const {
200 static_assert(NewBits
>= Bits
, "sext() may not decrease width");
201 value
<NewBits
> result
;
202 for (size_t n
= 0; n
< chunks
; n
++)
203 result
.data
[n
] = data
[n
];
205 result
.data
[chunks
- 1] |= ~msb_mask
;
206 for (size_t n
= chunks
; n
< result
.chunks
; n
++)
207 result
.data
[n
] = chunk::mask
;
208 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
213 template<size_t NewBits
>
215 value
<NewBits
> rtrunc() const {
216 static_assert(NewBits
<= Bits
, "rtrunc() may not increase width");
217 value
<NewBits
> result
;
218 constexpr size_t shift_chunks
= (Bits
- NewBits
) / chunk::bits
;
219 constexpr size_t shift_bits
= (Bits
- NewBits
) % chunk::bits
;
220 chunk::type carry
= 0;
221 if (shift_chunks
+ result
.chunks
< chunks
) {
222 carry
= (shift_bits
== 0) ? 0
223 : data
[shift_chunks
+ result
.chunks
] << (chunk::bits
- shift_bits
);
225 for (size_t n
= result
.chunks
; n
> 0; n
--) {
226 result
.data
[n
- 1] = carry
| (data
[shift_chunks
+ n
- 1] >> shift_bits
);
227 carry
= (shift_bits
== 0) ? 0
228 : data
[shift_chunks
+ n
- 1] << (chunk::bits
- shift_bits
);
233 template<size_t NewBits
>
235 value
<NewBits
> rzext() const {
236 static_assert(NewBits
>= Bits
, "rzext() may not decrease width");
237 value
<NewBits
> result
;
238 constexpr size_t shift_chunks
= (NewBits
- Bits
) / chunk::bits
;
239 constexpr size_t shift_bits
= (NewBits
- Bits
) % chunk::bits
;
240 chunk::type carry
= 0;
241 for (size_t n
= 0; n
< chunks
; n
++) {
242 result
.data
[shift_chunks
+ n
] = (data
[n
] << shift_bits
) | carry
;
243 carry
= (shift_bits
== 0) ? 0
244 : data
[n
] >> (chunk::bits
- shift_bits
);
246 if (shift_chunks
+ chunks
< result
.chunks
)
247 result
.data
[shift_chunks
+ chunks
] = carry
;
251 // Bit blit operation, i.e. a partial read-modify-write.
252 template<size_t Stop
, size_t Start
>
254 value
<Bits
> blit(const value
<Stop
- Start
+ 1> &source
) const {
255 static_assert(Stop
>= Start
, "blit() may not reverse bit order");
256 constexpr chunk::type start_mask
= ~(chunk::mask
<< (Start
% chunk::bits
));
257 constexpr chunk::type stop_mask
= (Stop
% chunk::bits
+ 1 == chunk::bits
) ? 0
258 : (chunk::mask
<< (Stop
% chunk::bits
+ 1));
259 value
<Bits
> masked
= *this;
260 if (Start
/ chunk::bits
== Stop
/ chunk::bits
) {
261 masked
.data
[Start
/ chunk::bits
] &= stop_mask
| start_mask
;
263 masked
.data
[Start
/ chunk::bits
] &= start_mask
;
264 for (size_t n
= Start
/ chunk::bits
+ 1; n
< Stop
/ chunk::bits
; n
++)
266 masked
.data
[Stop
/ chunk::bits
] &= stop_mask
;
268 value
<Bits
> shifted
= source
269 .template rzext
<Stop
+ 1>()
270 .template zext
<Bits
>();
271 return masked
.bit_or(shifted
);
274 // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower
275 // than the operand. In C++17 these can be replaced with `if constexpr`.
276 template<size_t NewBits
, typename
= void>
279 value
<NewBits
> operator()(const value
<Bits
> &val
) {
280 return val
.template zext
<NewBits
>();
284 template<size_t NewBits
>
285 struct zext_cast
<NewBits
, typename
std::enable_if
<(NewBits
< Bits
)>::type
> {
287 value
<NewBits
> operator()(const value
<Bits
> &val
) {
288 return val
.template trunc
<NewBits
>();
292 template<size_t NewBits
, typename
= void>
295 value
<NewBits
> operator()(const value
<Bits
> &val
) {
296 return val
.template sext
<NewBits
>();
300 template<size_t NewBits
>
301 struct sext_cast
<NewBits
, typename
std::enable_if
<(NewBits
< Bits
)>::type
> {
303 value
<NewBits
> operator()(const value
<Bits
> &val
) {
304 return val
.template trunc
<NewBits
>();
308 template<size_t NewBits
>
310 value
<NewBits
> zcast() const {
311 return zext_cast
<NewBits
>()(*this);
314 template<size_t NewBits
>
316 value
<NewBits
> scast() const {
317 return sext_cast
<NewBits
>()(*this);
320 // Bit replication is far more efficient than the equivalent concatenation.
321 template<size_t Count
>
323 value
<Bits
* Count
> repeat() const {
324 static_assert(Bits
== 1, "repeat() is implemented only for 1-bit values");
325 return *this ? value
<Bits
* Count
>().bit_not() : value
<Bits
* Count
>();
328 // Operations with run-time parameters (offsets, amounts, etc).
330 // These operations are used for computations.
331 bool bit(size_t offset
) const {
332 return data
[offset
/ chunk::bits
] & (1 << (offset
% chunk::bits
));
335 void set_bit(size_t offset
, bool value
= true) {
336 size_t offset_chunks
= offset
/ chunk::bits
;
337 size_t offset_bits
= offset
% chunk::bits
;
338 data
[offset_chunks
] &= ~(1 << offset_bits
);
339 data
[offset_chunks
] |= value
? 1 << offset_bits
: 0;
342 explicit operator bool() const {
346 bool is_zero() const {
347 for (size_t n
= 0; n
< chunks
; n
++)
353 bool is_neg() const {
354 return data
[chunks
- 1] & (1 << ((Bits
- 1) % chunk::bits
));
357 bool operator ==(const value
<Bits
> &other
) const {
358 for (size_t n
= 0; n
< chunks
; n
++)
359 if (data
[n
] != other
.data
[n
])
364 bool operator !=(const value
<Bits
> &other
) const {
365 return !(*this == other
);
368 value
<Bits
> bit_not() const {
370 for (size_t n
= 0; n
< chunks
; n
++)
371 result
.data
[n
] = ~data
[n
];
372 result
.data
[chunks
- 1] &= msb_mask
;
376 value
<Bits
> bit_and(const value
<Bits
> &other
) const {
378 for (size_t n
= 0; n
< chunks
; n
++)
379 result
.data
[n
] = data
[n
] & other
.data
[n
];
383 value
<Bits
> bit_or(const value
<Bits
> &other
) const {
385 for (size_t n
= 0; n
< chunks
; n
++)
386 result
.data
[n
] = data
[n
] | other
.data
[n
];
390 value
<Bits
> bit_xor(const value
<Bits
> &other
) const {
392 for (size_t n
= 0; n
< chunks
; n
++)
393 result
.data
[n
] = data
[n
] ^ other
.data
[n
];
397 value
<Bits
> update(const value
<Bits
> &val
, const value
<Bits
> &mask
) const {
398 return bit_and(mask
.bit_not()).bit_or(val
.bit_and(mask
));
401 template<size_t AmountBits
>
402 value
<Bits
> shl(const value
<AmountBits
> &amount
) const {
403 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
404 static_assert(Bits
<= chunk::mask
, "shl() of unreasonably large values is not supported");
405 // Detect shifts definitely large than Bits early.
406 for (size_t n
= 1; n
< amount
.chunks
; n
++)
407 if (amount
.data
[n
] != 0)
409 // Past this point we can use the least significant chunk as the shift size.
410 size_t shift_chunks
= amount
.data
[0] / chunk::bits
;
411 size_t shift_bits
= amount
.data
[0] % chunk::bits
;
412 if (shift_chunks
>= chunks
)
415 chunk::type carry
= 0;
416 for (size_t n
= 0; n
< chunks
- shift_chunks
; n
++) {
417 result
.data
[shift_chunks
+ n
] = (data
[n
] << shift_bits
) | carry
;
418 carry
= (shift_bits
== 0) ? 0
419 : data
[n
] >> (chunk::bits
- shift_bits
);
424 template<size_t AmountBits
, bool Signed
= false>
425 value
<Bits
> shr(const value
<AmountBits
> &amount
) const {
426 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
427 static_assert(Bits
<= chunk::mask
, "shr() of unreasonably large values is not supported");
428 // Detect shifts definitely large than Bits early.
429 for (size_t n
= 1; n
< amount
.chunks
; n
++)
430 if (amount
.data
[n
] != 0)
432 // Past this point we can use the least significant chunk as the shift size.
433 size_t shift_chunks
= amount
.data
[0] / chunk::bits
;
434 size_t shift_bits
= amount
.data
[0] % chunk::bits
;
435 if (shift_chunks
>= chunks
)
438 chunk::type carry
= 0;
439 for (size_t n
= 0; n
< chunks
- shift_chunks
; n
++) {
440 result
.data
[chunks
- shift_chunks
- 1 - n
] = carry
| (data
[chunks
- 1 - n
] >> shift_bits
);
441 carry
= (shift_bits
== 0) ? 0
442 : data
[chunks
- 1 - n
] << (chunk::bits
- shift_bits
);
444 if (Signed
&& is_neg()) {
445 size_t top_chunk_idx
= (Bits
- shift_bits
) / chunk::bits
;
446 size_t top_chunk_bits
= (Bits
- shift_bits
) % chunk::bits
;
447 for (size_t n
= top_chunk_idx
+ 1; n
< chunks
; n
++)
448 result
.data
[n
] = chunk::mask
;
450 result
.data
[top_chunk_idx
] |= chunk::mask
<< top_chunk_bits
;
455 template<size_t AmountBits
>
456 value
<Bits
> sshr(const value
<AmountBits
> &amount
) const {
457 return shr
<AmountBits
, /*Signed=*/true>(amount
);
460 size_t ctpop() const {
462 for (size_t n
= 0; n
< chunks
; n
++) {
463 // This loop implements the population count idiom as recognized by LLVM and GCC.
464 for (chunk::type x
= data
[n
]; x
!= 0; count
++)
470 size_t ctlz() const {
472 for (size_t n
= 0; n
< chunks
; n
++) {
473 chunk::type x
= data
[chunks
- 1 - n
];
475 count
+= (n
== 0 ? Bits
% chunk::bits
: chunk::bits
);
477 // This loop implements the find first set idiom as recognized by LLVM.
478 for (; x
!= 0; count
++)
485 template<bool Invert
, bool CarryIn
>
486 std::pair
<value
<Bits
>, bool /*CarryOut*/> alu(const value
<Bits
> &other
) const {
488 bool carry
= CarryIn
;
489 for (size_t n
= 0; n
< result
.chunks
; n
++) {
490 result
.data
[n
] = data
[n
] + (Invert
? ~other
.data
[n
] : other
.data
[n
]) + carry
;
491 if (result
.chunks
- 1 == n
)
492 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
493 carry
= (result
.data
[n
] < data
[n
]) ||
494 (result
.data
[n
] == data
[n
] && carry
);
496 return {result
, carry
};
499 value
<Bits
> add(const value
<Bits
> &other
) const {
500 return alu
</*Invert=*/false, /*CarryIn=*/false>(other
).first
;
503 value
<Bits
> sub(const value
<Bits
> &other
) const {
504 return alu
</*Invert=*/true, /*CarryIn=*/true>(other
).first
;
507 value
<Bits
> neg() const {
508 return value
<Bits
> { 0u }.sub(*this);
511 bool ucmp(const value
<Bits
> &other
) const {
513 std::tie(std::ignore
, carry
) = alu
</*Invert=*/true, /*CarryIn=*/true>(other
);
514 return !carry
; // a.ucmp(b) ≡ a u< b
517 bool scmp(const value
<Bits
> &other
) const {
520 std::tie(result
, carry
) = alu
</*Invert=*/true, /*CarryIn=*/true>(other
);
521 bool overflow
= (is_neg() == !other
.is_neg()) && (is_neg() != result
.is_neg());
522 return result
.is_neg() ^ overflow
; // a.scmp(b) ≡ a s< b
525 template<size_t ResultBits
>
526 value
<ResultBits
> mul(const value
<Bits
> &other
) const {
527 value
<ResultBits
> result
;
528 wide_chunk_t wide_result
[result
.chunks
+ 1] = {};
529 for (size_t n
= 0; n
< chunks
; n
++) {
530 for (size_t m
= 0; m
< chunks
&& n
+ m
< result
.chunks
; m
++) {
531 wide_result
[n
+ m
] += wide_chunk_t(data
[n
]) * wide_chunk_t(other
.data
[m
]);
532 wide_result
[n
+ m
+ 1] += wide_result
[n
+ m
] >> chunk::bits
;
533 wide_result
[n
+ m
] &= chunk::mask
;
536 for (size_t n
= 0; n
< result
.chunks
; n
++) {
537 result
.data
[n
] = wide_result
[n
];
539 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
544 // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here.
545 template<class T
, size_t Stop
, size_t Start
>
546 struct slice_expr
: public expr_base
<slice_expr
<T
, Stop
, Start
>> {
547 static_assert(Stop
>= Start
, "slice_expr() may not reverse bit order");
548 static_assert(Start
< T::bits
&& Stop
< T::bits
, "slice_expr() must be within bounds");
549 static constexpr size_t bits
= Stop
- Start
+ 1;
553 slice_expr(T
&expr
) : expr(expr
) {}
554 slice_expr(const slice_expr
<T
, Stop
, Start
> &) = delete;
557 operator value
<bits
>() const {
558 return static_cast<const value
<T::bits
> &>(expr
)
559 .template rtrunc
<T::bits
- Start
>()
560 .template trunc
<bits
>();
564 slice_expr
<T
, Stop
, Start
> &operator=(const value
<bits
> &rhs
) {
565 // Generic partial assignment implemented using a read-modify-write operation on the sliced expression.
566 expr
= static_cast<const value
<T::bits
> &>(expr
)
567 .template blit
<Stop
, Start
>(rhs
);
571 // A helper that forces the cast to value<>, which allows deduction to work.
573 value
<bits
> val() const {
574 return static_cast<const value
<bits
> &>(*this);
578 // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here.
579 template<class T
, class U
>
580 struct concat_expr
: public expr_base
<concat_expr
<T
, U
>> {
581 static constexpr size_t bits
= T::bits
+ U::bits
;
586 concat_expr(T
&ms_expr
, U
&ls_expr
) : ms_expr(ms_expr
), ls_expr(ls_expr
) {}
587 concat_expr(const concat_expr
<T
, U
> &) = delete;
590 operator value
<bits
>() const {
591 value
<bits
> ms_shifted
= static_cast<const value
<T::bits
> &>(ms_expr
)
592 .template rzext
<bits
>();
593 value
<bits
> ls_extended
= static_cast<const value
<U::bits
> &>(ls_expr
)
594 .template zext
<bits
>();
595 return ms_shifted
.bit_or(ls_extended
);
599 concat_expr
<T
, U
> &operator=(const value
<bits
> &rhs
) {
600 ms_expr
= rhs
.template rtrunc
<T::bits
>();
601 ls_expr
= rhs
.template trunc
<U::bits
>();
605 // A helper that forces the cast to value<>, which allows deduction to work.
607 value
<bits
> val() const {
608 return static_cast<const value
<bits
> &>(*this);
612 // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues.
614 // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because
615 // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of
616 // these snippets perform use-after-free:
618 // const auto &a = val.slice<7,0>().slice<1>();
621 // auto &&c = val.slice<7,0>().slice<1>();
624 // An easy way to write code using slices and concatenations safely is to follow two simple rules:
625 // * Never explicitly name any type except `value<W>` or `const value<W> &`.
626 // * Never use a `const auto &` or `auto &&` in any such expression.
627 // Then, any code that compiles will be well-defined.
630 template<size_t Stop
, size_t Start
= Stop
>
632 slice_expr
<const T
, Stop
, Start
> slice() const {
633 return {*static_cast<const T
*>(this)};
636 template<size_t Stop
, size_t Start
= Stop
>
638 slice_expr
<T
, Stop
, Start
> slice() {
639 return {*static_cast<T
*>(this)};
644 concat_expr
<const T
, typename
std::remove_reference
<const U
>::type
> concat(const U
&other
) const {
645 return {*static_cast<const T
*>(this), other
};
650 concat_expr
<T
, typename
std::remove_reference
<U
>::type
> concat(U
&&other
) {
651 return {*static_cast<T
*>(this), other
};
655 template<size_t Bits
>
656 std::ostream
&operator<<(std::ostream
&os
, const value
<Bits
> &val
) {
657 auto old_flags
= os
.flags(std::ios::right
);
658 auto old_width
= os
.width(0);
659 auto old_fill
= os
.fill('0');
660 os
<< val
.bits
<< '\'' << std::hex
;
661 for (size_t n
= val
.chunks
- 1; n
!= (size_t)-1; n
--) {
662 if (n
== val
.chunks
- 1 && Bits
% value
<Bits
>::chunk::bits
!= 0)
663 os
.width((Bits
% value
<Bits
>::chunk::bits
+ 3) / 4);
665 os
.width((value
<Bits
>::chunk::bits
+ 3) / 4);
674 template<size_t Bits
>
676 static constexpr size_t bits
= Bits
;
682 explicit constexpr wire(const value
<Bits
> &init
) : curr(init
), next(init
) {}
683 template<typename
... Init
>
684 explicit constexpr wire(Init
...init
) : curr
{init
...}, next
{init
...} {}
686 // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire,
687 // e.g. because a module is built with a different optimization level, then existing code could
688 // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure
689 // this doesn't happen, prohibit copying and copy-assigning wires.
690 wire(const wire
<Bits
> &) = delete;
691 wire
<Bits
> &operator=(const wire
<Bits
> &) = delete;
693 wire(wire
<Bits
> &&) = default;
694 wire
<Bits
> &operator=(wire
<Bits
> &&) = default;
696 template<class IntegerT
>
698 IntegerT
get() const {
699 return curr
.template get
<IntegerT
>();
702 template<class IntegerT
>
704 void set(IntegerT other
) {
705 next
.template set
<IntegerT
>(other
);
717 template<size_t Bits
>
718 std::ostream
&operator<<(std::ostream
&os
, const wire
<Bits
> &val
) {
723 template<size_t Width
>
725 std::vector
<value
<Width
>> data
;
727 size_t depth() const {
732 explicit memory(size_t depth
) : data(depth
) {}
734 memory(const memory
<Width
> &) = delete;
735 memory
<Width
> &operator=(const memory
<Width
> &) = delete;
737 memory(memory
<Width
> &&) = default;
738 memory
<Width
> &operator=(memory
<Width
> &&) = default;
740 // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
741 // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
742 // construct an initializer_list in a constexpr (or something) and so if you try to do that the whole thing is
743 // first copied on the stack (probably overflowing it) and then again into `data`.
744 template<size_t Size
>
747 value
<Width
> data
[Size
];
750 template<size_t... InitSize
>
751 explicit memory(size_t depth
, const init
<InitSize
> &...init
) : data(depth
) {
753 // This utterly reprehensible construct is the most reasonable way to apply a function to every element
754 // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list.
755 auto _
= {std::move(std::begin(init
.data
), std::end(init
.data
), data
.begin() + init
.offset
)...};
759 // An operator for direct memory reads. May be used at any time during the simulation.
760 const value
<Width
> &operator [](size_t index
) const {
761 assert(index
< data
.size());
765 // An operator for direct memory writes. May only be used before the simulation is started. If used
766 // after the simulation is started, the design may malfunction.
767 value
<Width
> &operator [](size_t index
) {
768 assert(index
< data
.size());
772 // A simple way to make a writable memory would be to use an array of wires instead of an array of values.
773 // However, there are two significant downsides to this approach: first, it has large overhead (2× space
774 // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
775 // priorities. Although in principle write ports could be ordered or conditionally enabled in generated
776 // code based on their priorities and selected addresses, the feedback arc set problem is computationally
777 // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
778 // a particular write port evaluation order.
780 // The approach used here instead is to queue writes into a buffer during the eval phase, then perform
781 // the writes during the commit phase in the priority order. This approach has low overhead, with both space
782 // and time proportional to the amount of write ports. Because virtually every memory in a practical design
783 // has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
790 std::vector
<write
> write_queue
;
792 void update(size_t index
, const value
<Width
> &val
, const value
<Width
> &mask
, int priority
= 0) {
793 assert(index
< data
.size());
794 // Queue up the write while keeping the queue sorted by priority.
796 std::upper_bound(write_queue
.begin(), write_queue
.end(), priority
,
797 [](const int a
, const write
& b
) { return a
< b
.priority
; }),
798 write
{ index
, val
, mask
, priority
});
802 bool changed
= false;
803 for (const write
&entry
: write_queue
) {
804 value
<Width
> elem
= data
[entry
.index
];
805 elem
= elem
.update(entry
.val
, entry
.mask
);
806 changed
|= (data
[entry
.index
] != elem
);
807 data
[entry
.index
] = elem
;
823 // In debug mode, using the wrong .as_*() function will assert.
824 // In release mode, using the wrong .as_*() function will safely return a default value.
825 const unsigned uint_value
= 0;
826 const signed sint_value
= 0;
827 const std::string string_value
= "";
828 const double double_value
= 0.0;
830 metadata() : value_type(MISSING
) {}
831 metadata(unsigned value
) : value_type(UINT
), uint_value(value
) {}
832 metadata(signed value
) : value_type(SINT
), sint_value(value
) {}
833 metadata(const std::string
&value
) : value_type(STRING
), string_value(value
) {}
834 metadata(const char *value
) : value_type(STRING
), string_value(value
) {}
835 metadata(double value
) : value_type(DOUBLE
), double_value(value
) {}
837 metadata(const metadata
&) = default;
838 metadata
&operator=(const metadata
&) = delete;
840 unsigned as_uint() const {
841 assert(value_type
== UINT
);
845 signed as_sint() const {
846 assert(value_type
== SINT
);
850 const std::string
&as_string() const {
851 assert(value_type
== STRING
);
855 double as_double() const {
856 assert(value_type
== DOUBLE
);
861 typedef std::map
<std::string
, metadata
> metadata_map
;
863 // Tag class to disambiguate values/wires and their aliases.
864 struct debug_alias
{};
866 // Tag declaration to disambiguate values and debug outlines.
867 using debug_outline
= ::_cxxrtl_outline
;
869 // This structure is intended for consumption via foreign function interfaces, like Python's ctypes.
870 // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++.
872 // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used
873 // in the C API, or it would not be possible to cast between the pointers to these.
874 struct debug_item
: ::cxxrtl_object
{
877 VALUE
= CXXRTL_VALUE
,
879 MEMORY
= CXXRTL_MEMORY
,
880 ALIAS
= CXXRTL_ALIAS
,
881 OUTLINE
= CXXRTL_OUTLINE
,
886 INPUT
= CXXRTL_INPUT
,
887 OUTPUT
= CXXRTL_OUTPUT
,
888 INOUT
= CXXRTL_INOUT
,
889 DRIVEN_SYNC
= CXXRTL_DRIVEN_SYNC
,
890 DRIVEN_COMB
= CXXRTL_DRIVEN_COMB
,
891 UNDRIVEN
= CXXRTL_UNDRIVEN
,
894 debug_item(const ::cxxrtl_object
&object
) : cxxrtl_object(object
) {}
896 template<size_t Bits
>
897 debug_item(value
<Bits
> &item
, size_t lsb_offset
= 0, uint32_t flags_
= 0) {
898 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
899 "value<Bits> is not compatible with C layout");
911 template<size_t Bits
>
912 debug_item(const value
<Bits
> &item
, size_t lsb_offset
= 0) {
913 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
914 "value<Bits> is not compatible with C layout");
921 curr
= const_cast<chunk_t
*>(item
.data
);
926 template<size_t Bits
>
927 debug_item(wire
<Bits
> &item
, size_t lsb_offset
= 0, uint32_t flags_
= 0) {
928 static_assert(sizeof(item
.curr
) == value
<Bits
>::chunks
* sizeof(chunk_t
) &&
929 sizeof(item
.next
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
930 "wire<Bits> is not compatible with C layout");
937 curr
= item
.curr
.data
;
938 next
= item
.next
.data
;
942 template<size_t Width
>
943 debug_item(memory
<Width
> &item
, size_t zero_offset
= 0) {
944 static_assert(sizeof(item
.data
[0]) == value
<Width
>::chunks
* sizeof(chunk_t
),
945 "memory<Width> is not compatible with C layout");
950 depth
= item
.data
.size();
951 zero_at
= zero_offset
;
952 curr
= item
.data
.empty() ? nullptr : item
.data
[0].data
;
957 template<size_t Bits
>
958 debug_item(debug_alias
, const value
<Bits
> &item
, size_t lsb_offset
= 0) {
959 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
960 "value<Bits> is not compatible with C layout");
967 curr
= const_cast<chunk_t
*>(item
.data
);
972 template<size_t Bits
>
973 debug_item(debug_alias
, const wire
<Bits
> &item
, size_t lsb_offset
= 0) {
974 static_assert(sizeof(item
.curr
) == value
<Bits
>::chunks
* sizeof(chunk_t
) &&
975 sizeof(item
.next
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
976 "wire<Bits> is not compatible with C layout");
983 curr
= const_cast<chunk_t
*>(item
.curr
.data
);
988 template<size_t Bits
>
989 debug_item(debug_outline
&group
, const value
<Bits
> &item
, size_t lsb_offset
= 0) {
990 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
991 "value<Bits> is not compatible with C layout");
998 curr
= const_cast<chunk_t
*>(item
.data
);
1003 static_assert(std::is_standard_layout
<debug_item
>::value
, "debug_item is not compatible with C layout");
1005 struct debug_items
{
1006 std::map
<std::string
, std::vector
<debug_item
>> table
;
1008 void add(const std::string
&name
, debug_item
&&item
) {
1009 std::vector
<debug_item
> &parts
= table
[name
];
1010 parts
.emplace_back(item
);
1011 std::sort(parts
.begin(), parts
.end(),
1012 [](const debug_item
&a
, const debug_item
&b
) {
1013 return a
.lsb_at
< b
.lsb_at
;
1017 size_t count(const std::string
&name
) const {
1018 if (table
.count(name
) == 0)
1020 return table
.at(name
).size();
1023 const std::vector
<debug_item
> &parts_at(const std::string
&name
) const {
1024 return table
.at(name
);
1027 const debug_item
&at(const std::string
&name
) const {
1028 const std::vector
<debug_item
> &parts
= table
.at(name
);
1029 assert(parts
.size() == 1);
1033 const debug_item
&operator [](const std::string
&name
) const {
1038 // Tag class to disambiguate module move constructor and module constructor that takes black boxes
1039 // out of another instance of the module.
1044 virtual ~module() {}
1046 // Modules with black boxes cannot be copied. Although not all designs include black boxes,
1047 // delete the copy constructor and copy assignment operator to make sure that any downstream
1048 // code that manipulates modules doesn't accidentally depend on their availability.
1049 module(const module
&) = delete;
1050 module
&operator=(const module
&) = delete;
1052 module(module
&&) = default;
1053 module
&operator=(module
&&) = default;
1055 virtual void reset() = 0;
1057 virtual bool eval() = 0;
1058 virtual bool commit() = 0;
1062 bool converged
= false;
1066 } while (commit() && !converged
);
1070 virtual void debug_info(debug_items
&items
, std::string path
= "") {
1071 (void)items
, (void)path
;
1075 } // namespace cxxrtl
1077 // Internal structures used to communicate with the implementation of the C interface.
1079 typedef struct _cxxrtl_toplevel
{
1080 std::unique_ptr
<cxxrtl::module
> module
;
1083 typedef struct _cxxrtl_outline
{
1084 std::function
<void()> eval
;
1087 // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic
1088 // and indepenent of Yosys implementation details.
1090 // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these
1091 // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands,
1092 // whereas basic operations on arbitrary width values require operands to be of the same width. These functions
1093 // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u`
1094 // if the corresponding operand is unsigned, and `s` if it is signed.
1095 namespace cxxrtl_yosys
{
1097 using namespace cxxrtl
;
1099 // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own.
1101 CXXRTL_ALWAYS_INLINE
1102 constexpr T
max(const T
&a
, const T
&b
) {
1103 return a
> b
? a
: b
;
1107 template<size_t BitsY
, size_t BitsA
>
1108 CXXRTL_ALWAYS_INLINE
1109 value
<BitsY
> logic_not(const value
<BitsA
> &a
) {
1110 return value
<BitsY
> { a
? 0u : 1u };
1113 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1114 CXXRTL_ALWAYS_INLINE
1115 value
<BitsY
> logic_and(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1116 return value
<BitsY
> { (bool(a
) && bool(b
)) ? 1u : 0u };
1119 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1120 CXXRTL_ALWAYS_INLINE
1121 value
<BitsY
> logic_or(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1122 return value
<BitsY
> { (bool(a
) || bool(b
)) ? 1u : 0u };
1125 // Reduction operations
1126 template<size_t BitsY
, size_t BitsA
>
1127 CXXRTL_ALWAYS_INLINE
1128 value
<BitsY
> reduce_and(const value
<BitsA
> &a
) {
1129 return value
<BitsY
> { a
.bit_not().is_zero() ? 1u : 0u };
1132 template<size_t BitsY
, size_t BitsA
>
1133 CXXRTL_ALWAYS_INLINE
1134 value
<BitsY
> reduce_or(const value
<BitsA
> &a
) {
1135 return value
<BitsY
> { a
? 1u : 0u };
1138 template<size_t BitsY
, size_t BitsA
>
1139 CXXRTL_ALWAYS_INLINE
1140 value
<BitsY
> reduce_xor(const value
<BitsA
> &a
) {
1141 return value
<BitsY
> { (a
.ctpop() % 2) ? 1u : 0u };
1144 template<size_t BitsY
, size_t BitsA
>
1145 CXXRTL_ALWAYS_INLINE
1146 value
<BitsY
> reduce_xnor(const value
<BitsA
> &a
) {
1147 return value
<BitsY
> { (a
.ctpop() % 2) ? 0u : 1u };
1150 template<size_t BitsY
, size_t BitsA
>
1151 CXXRTL_ALWAYS_INLINE
1152 value
<BitsY
> reduce_bool(const value
<BitsA
> &a
) {
1153 return value
<BitsY
> { a
? 1u : 0u };
1156 // Bitwise operations
1157 template<size_t BitsY
, size_t BitsA
>
1158 CXXRTL_ALWAYS_INLINE
1159 value
<BitsY
> not_u(const value
<BitsA
> &a
) {
1160 return a
.template zcast
<BitsY
>().bit_not();
1163 template<size_t BitsY
, size_t BitsA
>
1164 CXXRTL_ALWAYS_INLINE
1165 value
<BitsY
> not_s(const value
<BitsA
> &a
) {
1166 return a
.template scast
<BitsY
>().bit_not();
1169 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1170 CXXRTL_ALWAYS_INLINE
1171 value
<BitsY
> and_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1172 return a
.template zcast
<BitsY
>().bit_and(b
.template zcast
<BitsY
>());
1175 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1176 CXXRTL_ALWAYS_INLINE
1177 value
<BitsY
> and_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1178 return a
.template scast
<BitsY
>().bit_and(b
.template scast
<BitsY
>());
1181 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1182 CXXRTL_ALWAYS_INLINE
1183 value
<BitsY
> or_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1184 return a
.template zcast
<BitsY
>().bit_or(b
.template zcast
<BitsY
>());
1187 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1188 CXXRTL_ALWAYS_INLINE
1189 value
<BitsY
> or_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1190 return a
.template scast
<BitsY
>().bit_or(b
.template scast
<BitsY
>());
1193 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1194 CXXRTL_ALWAYS_INLINE
1195 value
<BitsY
> xor_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1196 return a
.template zcast
<BitsY
>().bit_xor(b
.template zcast
<BitsY
>());
1199 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1200 CXXRTL_ALWAYS_INLINE
1201 value
<BitsY
> xor_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1202 return a
.template scast
<BitsY
>().bit_xor(b
.template scast
<BitsY
>());
1205 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1206 CXXRTL_ALWAYS_INLINE
1207 value
<BitsY
> xnor_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1208 return a
.template zcast
<BitsY
>().bit_xor(b
.template zcast
<BitsY
>()).bit_not();
1211 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1212 CXXRTL_ALWAYS_INLINE
1213 value
<BitsY
> xnor_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1214 return a
.template scast
<BitsY
>().bit_xor(b
.template scast
<BitsY
>()).bit_not();
1217 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1218 CXXRTL_ALWAYS_INLINE
1219 value
<BitsY
> shl_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1220 return a
.template zcast
<BitsY
>().shl(b
);
1223 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1224 CXXRTL_ALWAYS_INLINE
1225 value
<BitsY
> shl_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1226 return a
.template scast
<BitsY
>().shl(b
);
1229 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1230 CXXRTL_ALWAYS_INLINE
1231 value
<BitsY
> sshl_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1232 return a
.template zcast
<BitsY
>().shl(b
);
1235 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1236 CXXRTL_ALWAYS_INLINE
1237 value
<BitsY
> sshl_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1238 return a
.template scast
<BitsY
>().shl(b
);
1241 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1242 CXXRTL_ALWAYS_INLINE
1243 value
<BitsY
> shr_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1244 return a
.shr(b
).template zcast
<BitsY
>();
1247 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1248 CXXRTL_ALWAYS_INLINE
1249 value
<BitsY
> shr_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1250 return a
.shr(b
).template scast
<BitsY
>();
1253 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1254 CXXRTL_ALWAYS_INLINE
1255 value
<BitsY
> sshr_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1256 return a
.shr(b
).template zcast
<BitsY
>();
1259 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1260 CXXRTL_ALWAYS_INLINE
1261 value
<BitsY
> sshr_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1262 return a
.sshr(b
).template scast
<BitsY
>();
1265 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1266 CXXRTL_ALWAYS_INLINE
1267 value
<BitsY
> shift_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1268 return shr_uu
<BitsY
>(a
, b
);
1271 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1272 CXXRTL_ALWAYS_INLINE
1273 value
<BitsY
> shift_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1274 return shr_su
<BitsY
>(a
, b
);
1277 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1278 CXXRTL_ALWAYS_INLINE
1279 value
<BitsY
> shift_us(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1280 return b
.is_neg() ? shl_uu
<BitsY
>(a
, b
.template sext
<BitsB
+ 1>().neg()) : shr_uu
<BitsY
>(a
, b
);
1283 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1284 CXXRTL_ALWAYS_INLINE
1285 value
<BitsY
> shift_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1286 return b
.is_neg() ? shl_su
<BitsY
>(a
, b
.template sext
<BitsB
+ 1>().neg()) : shr_su
<BitsY
>(a
, b
);
1289 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1290 CXXRTL_ALWAYS_INLINE
1291 value
<BitsY
> shiftx_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1292 return shift_uu
<BitsY
>(a
, b
);
1295 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1296 CXXRTL_ALWAYS_INLINE
1297 value
<BitsY
> shiftx_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1298 return shift_su
<BitsY
>(a
, b
);
1301 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1302 CXXRTL_ALWAYS_INLINE
1303 value
<BitsY
> shiftx_us(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1304 return shift_us
<BitsY
>(a
, b
);
1307 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1308 CXXRTL_ALWAYS_INLINE
1309 value
<BitsY
> shiftx_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1310 return shift_ss
<BitsY
>(a
, b
);
1313 // Comparison operations
1314 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1315 CXXRTL_ALWAYS_INLINE
1316 value
<BitsY
> eq_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1317 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1318 return value
<BitsY
>{ a
.template zext
<BitsExt
>() == b
.template zext
<BitsExt
>() ? 1u : 0u };
1321 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1322 CXXRTL_ALWAYS_INLINE
1323 value
<BitsY
> eq_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1324 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1325 return value
<BitsY
>{ a
.template sext
<BitsExt
>() == b
.template sext
<BitsExt
>() ? 1u : 0u };
1328 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1329 CXXRTL_ALWAYS_INLINE
1330 value
<BitsY
> ne_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1331 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1332 return value
<BitsY
>{ a
.template zext
<BitsExt
>() != b
.template zext
<BitsExt
>() ? 1u : 0u };
1335 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1336 CXXRTL_ALWAYS_INLINE
1337 value
<BitsY
> ne_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1338 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1339 return value
<BitsY
>{ a
.template sext
<BitsExt
>() != b
.template sext
<BitsExt
>() ? 1u : 0u };
1342 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1343 CXXRTL_ALWAYS_INLINE
1344 value
<BitsY
> eqx_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1345 return eq_uu
<BitsY
>(a
, b
);
1348 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1349 CXXRTL_ALWAYS_INLINE
1350 value
<BitsY
> eqx_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1351 return eq_ss
<BitsY
>(a
, b
);
1354 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1355 CXXRTL_ALWAYS_INLINE
1356 value
<BitsY
> nex_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1357 return ne_uu
<BitsY
>(a
, b
);
1360 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1361 CXXRTL_ALWAYS_INLINE
1362 value
<BitsY
> nex_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1363 return ne_ss
<BitsY
>(a
, b
);
1366 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1367 CXXRTL_ALWAYS_INLINE
1368 value
<BitsY
> gt_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1369 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1370 return value
<BitsY
> { b
.template zext
<BitsExt
>().ucmp(a
.template zext
<BitsExt
>()) ? 1u : 0u };
1373 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1374 CXXRTL_ALWAYS_INLINE
1375 value
<BitsY
> gt_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1376 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1377 return value
<BitsY
> { b
.template sext
<BitsExt
>().scmp(a
.template sext
<BitsExt
>()) ? 1u : 0u };
1380 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1381 CXXRTL_ALWAYS_INLINE
1382 value
<BitsY
> ge_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1383 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1384 return value
<BitsY
> { !a
.template zext
<BitsExt
>().ucmp(b
.template zext
<BitsExt
>()) ? 1u : 0u };
1387 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1388 CXXRTL_ALWAYS_INLINE
1389 value
<BitsY
> ge_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1390 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1391 return value
<BitsY
> { !a
.template sext
<BitsExt
>().scmp(b
.template sext
<BitsExt
>()) ? 1u : 0u };
1394 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1395 CXXRTL_ALWAYS_INLINE
1396 value
<BitsY
> lt_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1397 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1398 return value
<BitsY
> { a
.template zext
<BitsExt
>().ucmp(b
.template zext
<BitsExt
>()) ? 1u : 0u };
1401 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1402 CXXRTL_ALWAYS_INLINE
1403 value
<BitsY
> lt_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1404 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1405 return value
<BitsY
> { a
.template sext
<BitsExt
>().scmp(b
.template sext
<BitsExt
>()) ? 1u : 0u };
1408 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1409 CXXRTL_ALWAYS_INLINE
1410 value
<BitsY
> le_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1411 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1412 return value
<BitsY
> { !b
.template zext
<BitsExt
>().ucmp(a
.template zext
<BitsExt
>()) ? 1u : 0u };
1415 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1416 CXXRTL_ALWAYS_INLINE
1417 value
<BitsY
> le_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1418 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1419 return value
<BitsY
> { !b
.template sext
<BitsExt
>().scmp(a
.template sext
<BitsExt
>()) ? 1u : 0u };
1422 // Arithmetic operations
1423 template<size_t BitsY
, size_t BitsA
>
1424 CXXRTL_ALWAYS_INLINE
1425 value
<BitsY
> pos_u(const value
<BitsA
> &a
) {
1426 return a
.template zcast
<BitsY
>();
1429 template<size_t BitsY
, size_t BitsA
>
1430 CXXRTL_ALWAYS_INLINE
1431 value
<BitsY
> pos_s(const value
<BitsA
> &a
) {
1432 return a
.template scast
<BitsY
>();
1435 template<size_t BitsY
, size_t BitsA
>
1436 CXXRTL_ALWAYS_INLINE
1437 value
<BitsY
> neg_u(const value
<BitsA
> &a
) {
1438 return a
.template zcast
<BitsY
>().neg();
1441 template<size_t BitsY
, size_t BitsA
>
1442 CXXRTL_ALWAYS_INLINE
1443 value
<BitsY
> neg_s(const value
<BitsA
> &a
) {
1444 return a
.template scast
<BitsY
>().neg();
1447 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1448 CXXRTL_ALWAYS_INLINE
1449 value
<BitsY
> add_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1450 return a
.template zcast
<BitsY
>().add(b
.template zcast
<BitsY
>());
1453 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1454 CXXRTL_ALWAYS_INLINE
1455 value
<BitsY
> add_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1456 return a
.template scast
<BitsY
>().add(b
.template scast
<BitsY
>());
1459 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1460 CXXRTL_ALWAYS_INLINE
1461 value
<BitsY
> sub_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1462 return a
.template zcast
<BitsY
>().sub(b
.template zcast
<BitsY
>());
1465 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1466 CXXRTL_ALWAYS_INLINE
1467 value
<BitsY
> sub_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1468 return a
.template scast
<BitsY
>().sub(b
.template scast
<BitsY
>());
1471 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1472 CXXRTL_ALWAYS_INLINE
1473 value
<BitsY
> mul_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1474 constexpr size_t BitsM
= BitsA
>= BitsB
? BitsA
: BitsB
;
1475 return a
.template zcast
<BitsM
>().template mul
<BitsY
>(b
.template zcast
<BitsM
>());
1478 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1479 CXXRTL_ALWAYS_INLINE
1480 value
<BitsY
> mul_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1481 return a
.template scast
<BitsY
>().template mul
<BitsY
>(b
.template scast
<BitsY
>());
1484 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1485 CXXRTL_ALWAYS_INLINE
1486 std::pair
<value
<BitsY
>, value
<BitsY
>> divmod_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1487 constexpr size_t Bits
= max(BitsY
, max(BitsA
, BitsB
));
1488 value
<Bits
> quotient
;
1489 value
<Bits
> dividend
= a
.template zext
<Bits
>();
1490 value
<Bits
> divisor
= b
.template zext
<Bits
>();
1491 if (dividend
.ucmp(divisor
))
1492 return {/*quotient=*/value
<BitsY
> { 0u }, /*remainder=*/dividend
.template trunc
<BitsY
>()};
1493 uint32_t divisor_shift
= dividend
.ctlz() - divisor
.ctlz();
1494 divisor
= divisor
.shl(value
<32> { divisor_shift
});
1495 for (size_t step
= 0; step
<= divisor_shift
; step
++) {
1496 quotient
= quotient
.shl(value
<1> { 1u });
1497 if (!dividend
.ucmp(divisor
)) {
1498 dividend
= dividend
.sub(divisor
);
1499 quotient
.set_bit(0, true);
1501 divisor
= divisor
.shr(value
<1> { 1u });
1503 return {quotient
.template trunc
<BitsY
>(), /*remainder=*/dividend
.template trunc
<BitsY
>()};
1506 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1507 CXXRTL_ALWAYS_INLINE
1508 std::pair
<value
<BitsY
>, value
<BitsY
>> divmod_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1509 value
<BitsA
+ 1> ua
= a
.template sext
<BitsA
+ 1>();
1510 value
<BitsB
+ 1> ub
= b
.template sext
<BitsB
+ 1>();
1511 if (ua
.is_neg()) ua
= ua
.neg();
1512 if (ub
.is_neg()) ub
= ub
.neg();
1514 std::tie(y
, r
) = divmod_uu
<BitsY
>(ua
, ub
);
1515 if (a
.is_neg() != b
.is_neg()) y
= y
.neg();
1516 if (a
.is_neg()) r
= r
.neg();
1520 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1521 CXXRTL_ALWAYS_INLINE
1522 value
<BitsY
> div_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1523 return divmod_uu
<BitsY
>(a
, b
).first
;
1526 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1527 CXXRTL_ALWAYS_INLINE
1528 value
<BitsY
> div_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1529 return divmod_ss
<BitsY
>(a
, b
).first
;
1532 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1533 CXXRTL_ALWAYS_INLINE
1534 value
<BitsY
> mod_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1535 return divmod_uu
<BitsY
>(a
, b
).second
;
1538 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1539 CXXRTL_ALWAYS_INLINE
1540 value
<BitsY
> mod_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1541 return divmod_ss
<BitsY
>(a
, b
).second
;
1545 struct memory_index
{
1549 template<size_t BitsAddr
>
1550 memory_index(const value
<BitsAddr
> &addr
, size_t offset
, size_t depth
) {
1551 static_assert(value
<BitsAddr
>::chunks
<= 1, "memory address is too wide");
1552 size_t offset_index
= addr
.data
[0];
1554 valid
= (offset_index
>= offset
&& offset_index
< offset
+ depth
);
1555 index
= offset_index
- offset
;
1559 } // namespace cxxrtl_yosys