2 * yosys -- Yosys Open SYnthesis Suite
4 * Copyright (C) 2019-2020 whitequark <whitequark@whitequark.org>
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 // This file is included by the designs generated with `write_cxxrtl`. It is not used in Yosys itself.
21 // The CXXRTL support library implements compile time specialized arbitrary width arithmetics, as well as provides
22 // composite lvalues made out of bit slices and concatenations of lvalues. This allows the `write_cxxrtl` pass
23 // to perform a straightforward translation of RTLIL structures to readable C++, relying on the C++ compiler
24 // to unwrap the abstraction and generate efficient code.
33 #include <type_traits>
42 #include <backends/cxxrtl/cxxrtl_capi.h>
44 // CXXRTL essentially uses the C++ compiler as a hygienic macro engine that feeds an instruction selector.
45 // It generates a lot of specialized template functions with relatively large bodies that, when inlined
46 // into the caller and (for those with loops) unrolled, often expose many new optimization opportunities.
47 // Because of this, most of the CXXRTL runtime must be always inlined for best performance.
48 #ifndef __has_attribute
49 # define __has_attribute(x) 0
51 #if __has_attribute(always_inline)
52 #define CXXRTL_ALWAYS_INLINE inline __attribute__((__always_inline__))
54 #define CXXRTL_ALWAYS_INLINE inline
57 // CXXRTL uses assert() to check for C++ contract violations (which may result in e.g. undefined behavior
58 // of the simulation code itself), and CXXRTL_ASSERT to check for RTL contract violations (which may at
59 // most result in undefined simulation results).
61 // Though by default, CXXRTL_ASSERT() expands to assert(), it may be overridden e.g. when integrating
62 // the simulation into another process that should survive violating RTL contracts.
65 #define CXXRTL_ASSERT(x) assert(x)
67 #define CXXRTL_ASSERT(x)
73 // All arbitrary-width values in CXXRTL are backed by arrays of unsigned integers called chunks. The chunk size
74 // is the same regardless of the value width to simplify manipulating values via FFI interfaces, e.g. driving
75 // and introspecting the simulation in Python.
77 // It is practical to use chunk sizes between 32 bits and platform register size because when arithmetics on
78 // narrower integer types is legalized by the C++ compiler, it inserts code to clear the high bits of the register.
79 // However, (a) most of our operations do not change those bits in the first place because of invariants that are
80 // invisible to the compiler, (b) we often operate on non-power-of-2 values and have to clear the high bits anyway.
81 // Therefore, using relatively wide chunks and clearing the high bits explicitly and only when we know they may be
82 // clobbered results in simpler generated code.
83 typedef uint32_t chunk_t
;
84 typedef uint64_t wide_chunk_t
;
88 static_assert(std::is_integral
<T
>::value
&& std::is_unsigned
<T
>::value
,
89 "chunk type must be an unsigned integral type");
91 static constexpr size_t bits
= std::numeric_limits
<T
>::digits
;
92 static constexpr T mask
= std::numeric_limits
<T
>::max();
99 struct value
: public expr_base
<value
<Bits
>> {
100 static constexpr size_t bits
= Bits
;
102 using chunk
= chunk_traits
<chunk_t
>;
103 static constexpr chunk::type msb_mask
= (Bits
% chunk::bits
== 0) ? chunk::mask
104 : chunk::mask
>> (chunk::bits
- (Bits
% chunk::bits
));
106 static constexpr size_t chunks
= (Bits
+ chunk::bits
- 1) / chunk::bits
;
107 chunk::type data
[chunks
] = {};
110 template<typename
... Init
>
111 explicit constexpr value(Init
...init
) : data
{init
...} {}
113 value(const value
<Bits
> &) = default;
114 value
<Bits
> &operator=(const value
<Bits
> &) = default;
116 value(value
<Bits
> &&) = default;
117 value
<Bits
> &operator=(value
<Bits
> &&) = default;
119 // A (no-op) helper that forces the cast to value<>.
121 const value
<Bits
> &val() const {
125 std::string
str() const {
126 std::stringstream ss
;
131 // Conversion operations.
133 // These functions ensure that a conversion is never out of range, and should be always used, if at all
134 // possible, instead of direct manipulation of the `data` member. For very large types, .slice() and
135 // .concat() can be used to split them into more manageable parts.
136 template<class IntegerT
>
138 IntegerT
get() const {
139 static_assert(std::numeric_limits
<IntegerT
>::is_integer
&& !std::numeric_limits
<IntegerT
>::is_signed
,
140 "get<T>() requires T to be an unsigned integral type");
141 static_assert(std::numeric_limits
<IntegerT
>::digits
>= Bits
,
142 "get<T>() requires T to be at least as wide as the value is");
144 for (size_t n
= 0; n
< chunks
; n
++)
145 result
|= IntegerT(data
[n
]) << (n
* chunk::bits
);
149 template<class IntegerT
>
151 void set(IntegerT other
) {
152 static_assert(std::numeric_limits
<IntegerT
>::is_integer
&& !std::numeric_limits
<IntegerT
>::is_signed
,
153 "set<T>() requires T to be an unsigned integral type");
154 static_assert(std::numeric_limits
<IntegerT
>::digits
>= Bits
,
155 "set<T>() requires the value to be at least as wide as T is");
156 for (size_t n
= 0; n
< chunks
; n
++)
157 data
[n
] = (other
>> (n
* chunk::bits
)) & chunk::mask
;
160 // Operations with compile-time parameters.
162 // These operations are used to implement slicing, concatenation, and blitting.
163 // The trunc, zext and sext operations add or remove most significant bits (i.e. on the left);
164 // the rtrunc and rzext operations add or remove least significant bits (i.e. on the right).
165 template<size_t NewBits
>
167 value
<NewBits
> trunc() const {
168 static_assert(NewBits
<= Bits
, "trunc() may not increase width");
169 value
<NewBits
> result
;
170 for (size_t n
= 0; n
< result
.chunks
; n
++)
171 result
.data
[n
] = data
[n
];
172 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
176 template<size_t NewBits
>
178 value
<NewBits
> zext() const {
179 static_assert(NewBits
>= Bits
, "zext() may not decrease width");
180 value
<NewBits
> result
;
181 for (size_t n
= 0; n
< chunks
; n
++)
182 result
.data
[n
] = data
[n
];
186 template<size_t NewBits
>
188 value
<NewBits
> sext() const {
189 static_assert(NewBits
>= Bits
, "sext() may not decrease width");
190 value
<NewBits
> result
;
191 for (size_t n
= 0; n
< chunks
; n
++)
192 result
.data
[n
] = data
[n
];
194 result
.data
[chunks
- 1] |= ~msb_mask
;
195 for (size_t n
= chunks
; n
< result
.chunks
; n
++)
196 result
.data
[n
] = chunk::mask
;
197 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
202 template<size_t NewBits
>
204 value
<NewBits
> rtrunc() const {
205 static_assert(NewBits
<= Bits
, "rtrunc() may not increase width");
206 value
<NewBits
> result
;
207 constexpr size_t shift_chunks
= (Bits
- NewBits
) / chunk::bits
;
208 constexpr size_t shift_bits
= (Bits
- NewBits
) % chunk::bits
;
209 chunk::type carry
= 0;
210 if (shift_chunks
+ result
.chunks
< chunks
) {
211 carry
= (shift_bits
== 0) ? 0
212 : data
[shift_chunks
+ result
.chunks
] << (chunk::bits
- shift_bits
);
214 for (size_t n
= result
.chunks
; n
> 0; n
--) {
215 result
.data
[n
- 1] = carry
| (data
[shift_chunks
+ n
- 1] >> shift_bits
);
216 carry
= (shift_bits
== 0) ? 0
217 : data
[shift_chunks
+ n
- 1] << (chunk::bits
- shift_bits
);
222 template<size_t NewBits
>
224 value
<NewBits
> rzext() const {
225 static_assert(NewBits
>= Bits
, "rzext() may not decrease width");
226 value
<NewBits
> result
;
227 constexpr size_t shift_chunks
= (NewBits
- Bits
) / chunk::bits
;
228 constexpr size_t shift_bits
= (NewBits
- Bits
) % chunk::bits
;
229 chunk::type carry
= 0;
230 for (size_t n
= 0; n
< chunks
; n
++) {
231 result
.data
[shift_chunks
+ n
] = (data
[n
] << shift_bits
) | carry
;
232 carry
= (shift_bits
== 0) ? 0
233 : data
[n
] >> (chunk::bits
- shift_bits
);
235 if (shift_chunks
+ chunks
< result
.chunks
)
236 result
.data
[shift_chunks
+ chunks
] = carry
;
240 // Bit blit operation, i.e. a partial read-modify-write.
241 template<size_t Stop
, size_t Start
>
243 value
<Bits
> blit(const value
<Stop
- Start
+ 1> &source
) const {
244 static_assert(Stop
>= Start
, "blit() may not reverse bit order");
245 constexpr chunk::type start_mask
= ~(chunk::mask
<< (Start
% chunk::bits
));
246 constexpr chunk::type stop_mask
= (Stop
% chunk::bits
+ 1 == chunk::bits
) ? 0
247 : (chunk::mask
<< (Stop
% chunk::bits
+ 1));
248 value
<Bits
> masked
= *this;
249 if (Start
/ chunk::bits
== Stop
/ chunk::bits
) {
250 masked
.data
[Start
/ chunk::bits
] &= stop_mask
| start_mask
;
252 masked
.data
[Start
/ chunk::bits
] &= start_mask
;
253 for (size_t n
= Start
/ chunk::bits
+ 1; n
< Stop
/ chunk::bits
; n
++)
255 masked
.data
[Stop
/ chunk::bits
] &= stop_mask
;
257 value
<Bits
> shifted
= source
258 .template rzext
<Stop
+ 1>()
259 .template zext
<Bits
>();
260 return masked
.bit_or(shifted
);
263 // Helpers for selecting extending or truncating operation depending on whether the result is wider or narrower
264 // than the operand. In C++17 these can be replaced with `if constexpr`.
265 template<size_t NewBits
, typename
= void>
268 value
<NewBits
> operator()(const value
<Bits
> &val
) {
269 return val
.template zext
<NewBits
>();
273 template<size_t NewBits
>
274 struct zext_cast
<NewBits
, typename
std::enable_if
<(NewBits
< Bits
)>::type
> {
276 value
<NewBits
> operator()(const value
<Bits
> &val
) {
277 return val
.template trunc
<NewBits
>();
281 template<size_t NewBits
, typename
= void>
284 value
<NewBits
> operator()(const value
<Bits
> &val
) {
285 return val
.template sext
<NewBits
>();
289 template<size_t NewBits
>
290 struct sext_cast
<NewBits
, typename
std::enable_if
<(NewBits
< Bits
)>::type
> {
292 value
<NewBits
> operator()(const value
<Bits
> &val
) {
293 return val
.template trunc
<NewBits
>();
297 template<size_t NewBits
>
299 value
<NewBits
> zcast() const {
300 return zext_cast
<NewBits
>()(*this);
303 template<size_t NewBits
>
305 value
<NewBits
> scast() const {
306 return sext_cast
<NewBits
>()(*this);
309 // Operations with run-time parameters (offsets, amounts, etc).
311 // These operations are used for computations.
312 bool bit(size_t offset
) const {
313 return data
[offset
/ chunk::bits
] & (1 << (offset
% chunk::bits
));
316 void set_bit(size_t offset
, bool value
= true) {
317 size_t offset_chunks
= offset
/ chunk::bits
;
318 size_t offset_bits
= offset
% chunk::bits
;
319 data
[offset_chunks
] &= ~(1 << offset_bits
);
320 data
[offset_chunks
] |= value
? 1 << offset_bits
: 0;
323 explicit operator bool() const {
327 bool is_zero() const {
328 for (size_t n
= 0; n
< chunks
; n
++)
334 bool is_neg() const {
335 return data
[chunks
- 1] & (1 << ((Bits
- 1) % chunk::bits
));
338 bool operator ==(const value
<Bits
> &other
) const {
339 for (size_t n
= 0; n
< chunks
; n
++)
340 if (data
[n
] != other
.data
[n
])
345 bool operator !=(const value
<Bits
> &other
) const {
346 return !(*this == other
);
349 value
<Bits
> bit_not() const {
351 for (size_t n
= 0; n
< chunks
; n
++)
352 result
.data
[n
] = ~data
[n
];
353 result
.data
[chunks
- 1] &= msb_mask
;
357 value
<Bits
> bit_and(const value
<Bits
> &other
) const {
359 for (size_t n
= 0; n
< chunks
; n
++)
360 result
.data
[n
] = data
[n
] & other
.data
[n
];
364 value
<Bits
> bit_or(const value
<Bits
> &other
) const {
366 for (size_t n
= 0; n
< chunks
; n
++)
367 result
.data
[n
] = data
[n
] | other
.data
[n
];
371 value
<Bits
> bit_xor(const value
<Bits
> &other
) const {
373 for (size_t n
= 0; n
< chunks
; n
++)
374 result
.data
[n
] = data
[n
] ^ other
.data
[n
];
378 value
<Bits
> update(const value
<Bits
> &val
, const value
<Bits
> &mask
) const {
379 return bit_and(mask
.bit_not()).bit_or(val
.bit_and(mask
));
382 template<size_t AmountBits
>
383 value
<Bits
> shl(const value
<AmountBits
> &amount
) const {
384 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
385 static_assert(Bits
<= chunk::mask
, "shl() of unreasonably large values is not supported");
386 // Detect shifts definitely large than Bits early.
387 for (size_t n
= 1; n
< amount
.chunks
; n
++)
388 if (amount
.data
[n
] != 0)
390 // Past this point we can use the least significant chunk as the shift size.
391 size_t shift_chunks
= amount
.data
[0] / chunk::bits
;
392 size_t shift_bits
= amount
.data
[0] % chunk::bits
;
393 if (shift_chunks
>= chunks
)
396 chunk::type carry
= 0;
397 for (size_t n
= 0; n
< chunks
- shift_chunks
; n
++) {
398 result
.data
[shift_chunks
+ n
] = (data
[n
] << shift_bits
) | carry
;
399 carry
= (shift_bits
== 0) ? 0
400 : data
[n
] >> (chunk::bits
- shift_bits
);
405 template<size_t AmountBits
, bool Signed
= false>
406 value
<Bits
> shr(const value
<AmountBits
> &amount
) const {
407 // Ensure our early return is correct by prohibiting values larger than 4 Gbit.
408 static_assert(Bits
<= chunk::mask
, "shr() of unreasonably large values is not supported");
409 // Detect shifts definitely large than Bits early.
410 for (size_t n
= 1; n
< amount
.chunks
; n
++)
411 if (amount
.data
[n
] != 0)
413 // Past this point we can use the least significant chunk as the shift size.
414 size_t shift_chunks
= amount
.data
[0] / chunk::bits
;
415 size_t shift_bits
= amount
.data
[0] % chunk::bits
;
416 if (shift_chunks
>= chunks
)
419 chunk::type carry
= 0;
420 for (size_t n
= 0; n
< chunks
- shift_chunks
; n
++) {
421 result
.data
[chunks
- shift_chunks
- 1 - n
] = carry
| (data
[chunks
- 1 - n
] >> shift_bits
);
422 carry
= (shift_bits
== 0) ? 0
423 : data
[chunks
- 1 - n
] << (chunk::bits
- shift_bits
);
425 if (Signed
&& is_neg()) {
426 size_t top_chunk_idx
= (Bits
- shift_bits
) / chunk::bits
;
427 size_t top_chunk_bits
= (Bits
- shift_bits
) % chunk::bits
;
428 for (size_t n
= top_chunk_idx
+ 1; n
< chunks
; n
++)
429 result
.data
[n
] = chunk::mask
;
431 result
.data
[top_chunk_idx
] |= chunk::mask
<< top_chunk_bits
;
436 template<size_t AmountBits
>
437 value
<Bits
> sshr(const value
<AmountBits
> &amount
) const {
438 return shr
<AmountBits
, /*Signed=*/true>(amount
);
441 size_t ctpop() const {
443 for (size_t n
= 0; n
< chunks
; n
++) {
444 // This loop implements the population count idiom as recognized by LLVM and GCC.
445 for (chunk::type x
= data
[n
]; x
!= 0; count
++)
451 size_t ctlz() const {
453 for (size_t n
= 0; n
< chunks
; n
++) {
454 chunk::type x
= data
[chunks
- 1 - n
];
456 count
+= (n
== 0 ? Bits
% chunk::bits
: chunk::bits
);
458 // This loop implements the find first set idiom as recognized by LLVM.
459 for (; x
!= 0; count
++)
466 template<bool Invert
, bool CarryIn
>
467 std::pair
<value
<Bits
>, bool /*CarryOut*/> alu(const value
<Bits
> &other
) const {
469 bool carry
= CarryIn
;
470 for (size_t n
= 0; n
< result
.chunks
; n
++) {
471 result
.data
[n
] = data
[n
] + (Invert
? ~other
.data
[n
] : other
.data
[n
]) + carry
;
472 if (result
.chunks
- 1 == n
)
473 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
474 carry
= (result
.data
[n
] < data
[n
]) ||
475 (result
.data
[n
] == data
[n
] && carry
);
477 return {result
, carry
};
480 value
<Bits
> add(const value
<Bits
> &other
) const {
481 return alu
</*Invert=*/false, /*CarryIn=*/false>(other
).first
;
484 value
<Bits
> sub(const value
<Bits
> &other
) const {
485 return alu
</*Invert=*/true, /*CarryIn=*/true>(other
).first
;
488 value
<Bits
> neg() const {
489 return value
<Bits
> { 0u }.sub(*this);
492 bool ucmp(const value
<Bits
> &other
) const {
494 std::tie(std::ignore
, carry
) = alu
</*Invert=*/true, /*CarryIn=*/true>(other
);
495 return !carry
; // a.ucmp(b) ≡ a u< b
498 bool scmp(const value
<Bits
> &other
) const {
501 std::tie(result
, carry
) = alu
</*Invert=*/true, /*CarryIn=*/true>(other
);
502 bool overflow
= (is_neg() == !other
.is_neg()) && (is_neg() != result
.is_neg());
503 return result
.is_neg() ^ overflow
; // a.scmp(b) ≡ a s< b
506 template<size_t ResultBits
>
507 value
<ResultBits
> mul(const value
<Bits
> &other
) const {
508 value
<ResultBits
> result
;
509 wide_chunk_t wide_result
[result
.chunks
+ 1] = {};
510 for (size_t n
= 0; n
< chunks
; n
++) {
511 for (size_t m
= 0; m
< chunks
&& n
+ m
< result
.chunks
; m
++) {
512 wide_result
[n
+ m
] += wide_chunk_t(data
[n
]) * wide_chunk_t(other
.data
[m
]);
513 wide_result
[n
+ m
+ 1] += wide_result
[n
+ m
] >> chunk::bits
;
514 wide_result
[n
+ m
] &= chunk::mask
;
517 for (size_t n
= 0; n
< result
.chunks
; n
++) {
518 result
.data
[n
] = wide_result
[n
];
520 result
.data
[result
.chunks
- 1] &= result
.msb_mask
;
525 // Expression template for a slice, usable as lvalue or rvalue, and composable with other expression templates here.
526 template<class T
, size_t Stop
, size_t Start
>
527 struct slice_expr
: public expr_base
<slice_expr
<T
, Stop
, Start
>> {
528 static_assert(Stop
>= Start
, "slice_expr() may not reverse bit order");
529 static_assert(Start
< T::bits
&& Stop
< T::bits
, "slice_expr() must be within bounds");
530 static constexpr size_t bits
= Stop
- Start
+ 1;
534 slice_expr(T
&expr
) : expr(expr
) {}
535 slice_expr(const slice_expr
<T
, Stop
, Start
> &) = delete;
538 operator value
<bits
>() const {
539 return static_cast<const value
<T::bits
> &>(expr
)
540 .template rtrunc
<T::bits
- Start
>()
541 .template trunc
<bits
>();
545 slice_expr
<T
, Stop
, Start
> &operator=(const value
<bits
> &rhs
) {
546 // Generic partial assignment implemented using a read-modify-write operation on the sliced expression.
547 expr
= static_cast<const value
<T::bits
> &>(expr
)
548 .template blit
<Stop
, Start
>(rhs
);
552 // A helper that forces the cast to value<>, which allows deduction to work.
554 value
<bits
> val() const {
555 return static_cast<const value
<bits
> &>(*this);
559 // Expression template for a concatenation, usable as lvalue or rvalue, and composable with other expression templates here.
560 template<class T
, class U
>
561 struct concat_expr
: public expr_base
<concat_expr
<T
, U
>> {
562 static constexpr size_t bits
= T::bits
+ U::bits
;
567 concat_expr(T
&ms_expr
, U
&ls_expr
) : ms_expr(ms_expr
), ls_expr(ls_expr
) {}
568 concat_expr(const concat_expr
<T
, U
> &) = delete;
571 operator value
<bits
>() const {
572 value
<bits
> ms_shifted
= static_cast<const value
<T::bits
> &>(ms_expr
)
573 .template rzext
<bits
>();
574 value
<bits
> ls_extended
= static_cast<const value
<U::bits
> &>(ls_expr
)
575 .template zext
<bits
>();
576 return ms_shifted
.bit_or(ls_extended
);
580 concat_expr
<T
, U
> &operator=(const value
<bits
> &rhs
) {
581 ms_expr
= rhs
.template rtrunc
<T::bits
>();
582 ls_expr
= rhs
.template trunc
<U::bits
>();
586 // A helper that forces the cast to value<>, which allows deduction to work.
588 value
<bits
> val() const {
589 return static_cast<const value
<bits
> &>(*this);
593 // Base class for expression templates, providing helper methods for operations that are valid on both rvalues and lvalues.
595 // Note that expression objects (slices and concatenations) constructed in this way should NEVER be captured because
596 // they refer to temporaries that will, in general, only live until the end of the statement. For example, both of
597 // these snippets perform use-after-free:
599 // const auto &a = val.slice<7,0>().slice<1>();
602 // auto &&c = val.slice<7,0>().slice<1>();
605 // An easy way to write code using slices and concatenations safely is to follow two simple rules:
606 // * Never explicitly name any type except `value<W>` or `const value<W> &`.
607 // * Never use a `const auto &` or `auto &&` in any such expression.
608 // Then, any code that compiles will be well-defined.
611 template<size_t Stop
, size_t Start
= Stop
>
613 slice_expr
<const T
, Stop
, Start
> slice() const {
614 return {*static_cast<const T
*>(this)};
617 template<size_t Stop
, size_t Start
= Stop
>
619 slice_expr
<T
, Stop
, Start
> slice() {
620 return {*static_cast<T
*>(this)};
625 concat_expr
<const T
, typename
std::remove_reference
<const U
>::type
> concat(const U
&other
) const {
626 return {*static_cast<const T
*>(this), other
};
631 concat_expr
<T
, typename
std::remove_reference
<U
>::type
> concat(U
&&other
) {
632 return {*static_cast<T
*>(this), other
};
636 template<size_t Bits
>
637 std::ostream
&operator<<(std::ostream
&os
, const value
<Bits
> &val
) {
638 auto old_flags
= os
.flags(std::ios::right
);
639 auto old_width
= os
.width(0);
640 auto old_fill
= os
.fill('0');
641 os
<< val
.bits
<< '\'' << std::hex
;
642 for (size_t n
= val
.chunks
- 1; n
!= (size_t)-1; n
--) {
643 if (n
== val
.chunks
- 1 && Bits
% value
<Bits
>::chunk::bits
!= 0)
644 os
.width((Bits
% value
<Bits
>::chunk::bits
+ 3) / 4);
646 os
.width((value
<Bits
>::chunk::bits
+ 3) / 4);
655 template<size_t Bits
>
657 static constexpr size_t bits
= Bits
;
663 explicit constexpr wire(const value
<Bits
> &init
) : curr(init
), next(init
) {}
664 template<typename
... Init
>
665 explicit constexpr wire(Init
...init
) : curr
{init
...}, next
{init
...} {}
667 // Copying and copy-assigning values is natural. If, however, a value is replaced with a wire,
668 // e.g. because a module is built with a different optimization level, then existing code could
669 // unintentionally copy a wire instead, which would create a subtle but serious bug. To make sure
670 // this doesn't happen, prohibit copying and copy-assigning wires.
671 wire(const wire
<Bits
> &) = delete;
672 wire
<Bits
> &operator=(const wire
<Bits
> &) = delete;
674 wire(wire
<Bits
> &&) = default;
675 wire
<Bits
> &operator=(wire
<Bits
> &&) = default;
677 template<class IntegerT
>
679 IntegerT
get() const {
680 return curr
.template get
<IntegerT
>();
683 template<class IntegerT
>
685 void set(IntegerT other
) {
686 next
.template set
<IntegerT
>(other
);
698 template<size_t Bits
>
699 std::ostream
&operator<<(std::ostream
&os
, const wire
<Bits
> &val
) {
704 template<size_t Width
>
706 std::vector
<value
<Width
>> data
;
708 size_t depth() const {
713 explicit memory(size_t depth
) : data(depth
) {}
715 memory(const memory
<Width
> &) = delete;
716 memory
<Width
> &operator=(const memory
<Width
> &) = delete;
718 memory(memory
<Width
> &&) = default;
719 memory
<Width
> &operator=(memory
<Width
> &&) = default;
721 // The only way to get the compiler to put the initializer in .rodata and do not copy it on stack is to stuff it
722 // into a plain array. You'd think an std::initializer_list would work here, but it doesn't, because you can't
723 // construct an initializer_list in a constexpr (or something) and so if you try to do that the whole thing is
724 // first copied on the stack (probably overflowing it) and then again into `data`.
725 template<size_t Size
>
728 value
<Width
> data
[Size
];
731 template<size_t... InitSize
>
732 explicit memory(size_t depth
, const init
<InitSize
> &...init
) : data(depth
) {
734 // This utterly reprehensible construct is the most reasonable way to apply a function to every element
735 // of a parameter pack, if the elements all have different types and so cannot be cast to an initializer list.
736 auto _
= {std::move(std::begin(init
.data
), std::end(init
.data
), data
.begin() + init
.offset
)...};
740 // An operator for direct memory reads. May be used at any time during the simulation.
741 const value
<Width
> &operator [](size_t index
) const {
742 assert(index
< data
.size());
746 // An operator for direct memory writes. May only be used before the simulation is started. If used
747 // after the simulation is started, the design may malfunction.
748 value
<Width
> &operator [](size_t index
) {
749 assert(index
< data
.size());
753 // A simple way to make a writable memory would be to use an array of wires instead of an array of values.
754 // However, there are two significant downsides to this approach: first, it has large overhead (2× space
755 // overhead, and O(depth) time overhead during commit); second, it does not simplify handling write port
756 // priorities. Although in principle write ports could be ordered or conditionally enabled in generated
757 // code based on their priorities and selected addresses, the feedback arc set problem is computationally
758 // expensive, and the heuristic based algorithms are not easily modified to guarantee (rather than prefer)
759 // a particular write port evaluation order.
761 // The approach used here instead is to queue writes into a buffer during the eval phase, then perform
762 // the writes during the commit phase in the priority order. This approach has low overhead, with both space
763 // and time proportional to the amount of write ports. Because virtually every memory in a practical design
764 // has at most two write ports, linear search is used on every write, being the fastest and simplest approach.
771 std::vector
<write
> write_queue
;
773 void update(size_t index
, const value
<Width
> &val
, const value
<Width
> &mask
, int priority
= 0) {
774 assert(index
< data
.size());
775 // Queue up the write while keeping the queue sorted by priority.
777 std::upper_bound(write_queue
.begin(), write_queue
.end(), priority
,
778 [](const int a
, const write
& b
) { return a
< b
.priority
; }),
779 write
{ index
, val
, mask
, priority
});
783 bool changed
= false;
784 for (const write
&entry
: write_queue
) {
785 value
<Width
> elem
= data
[entry
.index
];
786 elem
= elem
.update(entry
.val
, entry
.mask
);
787 changed
|= (data
[entry
.index
] != elem
);
788 data
[entry
.index
] = elem
;
804 // In debug mode, using the wrong .as_*() function will assert.
805 // In release mode, using the wrong .as_*() function will safely return a default value.
806 const unsigned uint_value
= 0;
807 const signed sint_value
= 0;
808 const std::string string_value
= "";
809 const double double_value
= 0.0;
811 metadata() : value_type(MISSING
) {}
812 metadata(unsigned value
) : value_type(UINT
), uint_value(value
) {}
813 metadata(signed value
) : value_type(SINT
), sint_value(value
) {}
814 metadata(const std::string
&value
) : value_type(STRING
), string_value(value
) {}
815 metadata(const char *value
) : value_type(STRING
), string_value(value
) {}
816 metadata(double value
) : value_type(DOUBLE
), double_value(value
) {}
818 metadata(const metadata
&) = default;
819 metadata
&operator=(const metadata
&) = delete;
821 unsigned as_uint() const {
822 assert(value_type
== UINT
);
826 signed as_sint() const {
827 assert(value_type
== SINT
);
831 const std::string
&as_string() const {
832 assert(value_type
== STRING
);
836 double as_double() const {
837 assert(value_type
== DOUBLE
);
842 typedef std::map
<std::string
, metadata
> metadata_map
;
844 // Tag class to disambiguate values/wires and their aliases.
845 struct debug_alias
{};
847 // Tag declaration to disambiguate values and debug outlines.
848 using debug_outline
= ::_cxxrtl_outline
;
850 // This structure is intended for consumption via foreign function interfaces, like Python's ctypes.
851 // Because of this it uses a C-style layout that is easy to parse rather than more idiomatic C++.
853 // To avoid violating strict aliasing rules, this structure has to be a subclass of the one used
854 // in the C API, or it would not be possible to cast between the pointers to these.
855 struct debug_item
: ::cxxrtl_object
{
858 VALUE
= CXXRTL_VALUE
,
860 MEMORY
= CXXRTL_MEMORY
,
861 ALIAS
= CXXRTL_ALIAS
,
862 OUTLINE
= CXXRTL_OUTLINE
,
867 INPUT
= CXXRTL_INPUT
,
868 OUTPUT
= CXXRTL_OUTPUT
,
869 INOUT
= CXXRTL_INOUT
,
870 DRIVEN_SYNC
= CXXRTL_DRIVEN_SYNC
,
871 DRIVEN_COMB
= CXXRTL_DRIVEN_COMB
,
872 UNDRIVEN
= CXXRTL_UNDRIVEN
,
875 debug_item(const ::cxxrtl_object
&object
) : cxxrtl_object(object
) {}
877 template<size_t Bits
>
878 debug_item(value
<Bits
> &item
, size_t lsb_offset
= 0, uint32_t flags_
= 0) {
879 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
880 "value<Bits> is not compatible with C layout");
892 template<size_t Bits
>
893 debug_item(const value
<Bits
> &item
, size_t lsb_offset
= 0) {
894 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
895 "value<Bits> is not compatible with C layout");
902 curr
= const_cast<chunk_t
*>(item
.data
);
907 template<size_t Bits
>
908 debug_item(wire
<Bits
> &item
, size_t lsb_offset
= 0, uint32_t flags_
= 0) {
909 static_assert(sizeof(item
.curr
) == value
<Bits
>::chunks
* sizeof(chunk_t
) &&
910 sizeof(item
.next
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
911 "wire<Bits> is not compatible with C layout");
918 curr
= item
.curr
.data
;
919 next
= item
.next
.data
;
923 template<size_t Width
>
924 debug_item(memory
<Width
> &item
, size_t zero_offset
= 0) {
925 static_assert(sizeof(item
.data
[0]) == value
<Width
>::chunks
* sizeof(chunk_t
),
926 "memory<Width> is not compatible with C layout");
931 depth
= item
.data
.size();
932 zero_at
= zero_offset
;
933 curr
= item
.data
.empty() ? nullptr : item
.data
[0].data
;
938 template<size_t Bits
>
939 debug_item(debug_alias
, const value
<Bits
> &item
, size_t lsb_offset
= 0) {
940 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
941 "value<Bits> is not compatible with C layout");
948 curr
= const_cast<chunk_t
*>(item
.data
);
953 template<size_t Bits
>
954 debug_item(debug_alias
, const wire
<Bits
> &item
, size_t lsb_offset
= 0) {
955 static_assert(sizeof(item
.curr
) == value
<Bits
>::chunks
* sizeof(chunk_t
) &&
956 sizeof(item
.next
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
957 "wire<Bits> is not compatible with C layout");
964 curr
= const_cast<chunk_t
*>(item
.curr
.data
);
969 template<size_t Bits
>
970 debug_item(debug_outline
&group
, const value
<Bits
> &item
, size_t lsb_offset
= 0) {
971 static_assert(sizeof(item
) == value
<Bits
>::chunks
* sizeof(chunk_t
),
972 "value<Bits> is not compatible with C layout");
979 curr
= const_cast<chunk_t
*>(item
.data
);
984 static_assert(std::is_standard_layout
<debug_item
>::value
, "debug_item is not compatible with C layout");
987 std::map
<std::string
, std::vector
<debug_item
>> table
;
989 void add(const std::string
&name
, debug_item
&&item
) {
990 std::vector
<debug_item
> &parts
= table
[name
];
991 parts
.emplace_back(item
);
992 std::sort(parts
.begin(), parts
.end(),
993 [](const debug_item
&a
, const debug_item
&b
) {
994 return a
.lsb_at
< b
.lsb_at
;
998 size_t count(const std::string
&name
) const {
999 if (table
.count(name
) == 0)
1001 return table
.at(name
).size();
1004 const std::vector
<debug_item
> &parts_at(const std::string
&name
) const {
1005 return table
.at(name
);
1008 const debug_item
&at(const std::string
&name
) const {
1009 const std::vector
<debug_item
> &parts
= table
.at(name
);
1010 assert(parts
.size() == 1);
1014 const debug_item
&operator [](const std::string
&name
) const {
1019 // Tag class to disambiguate module move constructor and module constructor that takes black boxes
1020 // out of another instance of the module.
1025 virtual ~module() {}
1027 // Modules with black boxes cannot be copied. Although not all designs include black boxes,
1028 // delete the copy constructor and copy assignment operator to make sure that any downstream
1029 // code that manipulates modules doesn't accidentally depend on their availability.
1030 module(const module
&) = delete;
1031 module
&operator=(const module
&) = delete;
1033 module(module
&&) = default;
1034 module
&operator=(module
&&) = default;
1036 virtual void reset() = 0;
1038 virtual bool eval() = 0;
1039 virtual bool commit() = 0;
1043 bool converged
= false;
1047 } while (commit() && !converged
);
1051 virtual void debug_info(debug_items
&items
, std::string path
= "") {
1052 (void)items
, (void)path
;
1056 } // namespace cxxrtl
1058 // Internal structures used to communicate with the implementation of the C interface.
1060 typedef struct _cxxrtl_toplevel
{
1061 std::unique_ptr
<cxxrtl::module
> module
;
1064 typedef struct _cxxrtl_outline
{
1065 std::function
<void()> eval
;
1068 // Definitions of internal Yosys cells. Other than the functions in this namespace, CXXRTL is fully generic
1069 // and indepenent of Yosys implementation details.
1071 // The `write_cxxrtl` pass translates internal cells (cells with names that start with `$`) to calls of these
1072 // functions. All of Yosys arithmetic and logical cells perform sign or zero extension on their operands,
1073 // whereas basic operations on arbitrary width values require operands to be of the same width. These functions
1074 // bridge the gap by performing the necessary casts. They are named similar to `cell_A[B]`, where A and B are `u`
1075 // if the corresponding operand is unsigned, and `s` if it is signed.
1076 namespace cxxrtl_yosys
{
1078 using namespace cxxrtl
;
1080 // std::max isn't constexpr until C++14 for no particular reason (it's an oversight), so we define our own.
1082 CXXRTL_ALWAYS_INLINE
1083 constexpr T
max(const T
&a
, const T
&b
) {
1084 return a
> b
? a
: b
;
1088 template<size_t BitsY
, size_t BitsA
>
1089 CXXRTL_ALWAYS_INLINE
1090 value
<BitsY
> logic_not(const value
<BitsA
> &a
) {
1091 return value
<BitsY
> { a
? 0u : 1u };
1094 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1095 CXXRTL_ALWAYS_INLINE
1096 value
<BitsY
> logic_and(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1097 return value
<BitsY
> { (bool(a
) && bool(b
)) ? 1u : 0u };
1100 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1101 CXXRTL_ALWAYS_INLINE
1102 value
<BitsY
> logic_or(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1103 return value
<BitsY
> { (bool(a
) || bool(b
)) ? 1u : 0u };
1106 // Reduction operations
1107 template<size_t BitsY
, size_t BitsA
>
1108 CXXRTL_ALWAYS_INLINE
1109 value
<BitsY
> reduce_and(const value
<BitsA
> &a
) {
1110 return value
<BitsY
> { a
.bit_not().is_zero() ? 1u : 0u };
1113 template<size_t BitsY
, size_t BitsA
>
1114 CXXRTL_ALWAYS_INLINE
1115 value
<BitsY
> reduce_or(const value
<BitsA
> &a
) {
1116 return value
<BitsY
> { a
? 1u : 0u };
1119 template<size_t BitsY
, size_t BitsA
>
1120 CXXRTL_ALWAYS_INLINE
1121 value
<BitsY
> reduce_xor(const value
<BitsA
> &a
) {
1122 return value
<BitsY
> { (a
.ctpop() % 2) ? 1u : 0u };
1125 template<size_t BitsY
, size_t BitsA
>
1126 CXXRTL_ALWAYS_INLINE
1127 value
<BitsY
> reduce_xnor(const value
<BitsA
> &a
) {
1128 return value
<BitsY
> { (a
.ctpop() % 2) ? 0u : 1u };
1131 template<size_t BitsY
, size_t BitsA
>
1132 CXXRTL_ALWAYS_INLINE
1133 value
<BitsY
> reduce_bool(const value
<BitsA
> &a
) {
1134 return value
<BitsY
> { a
? 1u : 0u };
1137 // Bitwise operations
1138 template<size_t BitsY
, size_t BitsA
>
1139 CXXRTL_ALWAYS_INLINE
1140 value
<BitsY
> not_u(const value
<BitsA
> &a
) {
1141 return a
.template zcast
<BitsY
>().bit_not();
1144 template<size_t BitsY
, size_t BitsA
>
1145 CXXRTL_ALWAYS_INLINE
1146 value
<BitsY
> not_s(const value
<BitsA
> &a
) {
1147 return a
.template scast
<BitsY
>().bit_not();
1150 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1151 CXXRTL_ALWAYS_INLINE
1152 value
<BitsY
> and_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1153 return a
.template zcast
<BitsY
>().bit_and(b
.template zcast
<BitsY
>());
1156 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1157 CXXRTL_ALWAYS_INLINE
1158 value
<BitsY
> and_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1159 return a
.template scast
<BitsY
>().bit_and(b
.template scast
<BitsY
>());
1162 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1163 CXXRTL_ALWAYS_INLINE
1164 value
<BitsY
> or_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1165 return a
.template zcast
<BitsY
>().bit_or(b
.template zcast
<BitsY
>());
1168 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1169 CXXRTL_ALWAYS_INLINE
1170 value
<BitsY
> or_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1171 return a
.template scast
<BitsY
>().bit_or(b
.template scast
<BitsY
>());
1174 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1175 CXXRTL_ALWAYS_INLINE
1176 value
<BitsY
> xor_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1177 return a
.template zcast
<BitsY
>().bit_xor(b
.template zcast
<BitsY
>());
1180 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1181 CXXRTL_ALWAYS_INLINE
1182 value
<BitsY
> xor_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1183 return a
.template scast
<BitsY
>().bit_xor(b
.template scast
<BitsY
>());
1186 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1187 CXXRTL_ALWAYS_INLINE
1188 value
<BitsY
> xnor_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1189 return a
.template zcast
<BitsY
>().bit_xor(b
.template zcast
<BitsY
>()).bit_not();
1192 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1193 CXXRTL_ALWAYS_INLINE
1194 value
<BitsY
> xnor_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1195 return a
.template scast
<BitsY
>().bit_xor(b
.template scast
<BitsY
>()).bit_not();
1198 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1199 CXXRTL_ALWAYS_INLINE
1200 value
<BitsY
> shl_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1201 return a
.template zcast
<BitsY
>().template shl(b
);
1204 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1205 CXXRTL_ALWAYS_INLINE
1206 value
<BitsY
> shl_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1207 return a
.template scast
<BitsY
>().template shl(b
);
1210 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1211 CXXRTL_ALWAYS_INLINE
1212 value
<BitsY
> sshl_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1213 return a
.template zcast
<BitsY
>().template shl(b
);
1216 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1217 CXXRTL_ALWAYS_INLINE
1218 value
<BitsY
> sshl_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1219 return a
.template scast
<BitsY
>().template shl(b
);
1222 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1223 CXXRTL_ALWAYS_INLINE
1224 value
<BitsY
> shr_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1225 return a
.template shr(b
).template zcast
<BitsY
>();
1228 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1229 CXXRTL_ALWAYS_INLINE
1230 value
<BitsY
> shr_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1231 return a
.template shr(b
).template scast
<BitsY
>();
1234 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1235 CXXRTL_ALWAYS_INLINE
1236 value
<BitsY
> sshr_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1237 return a
.template shr(b
).template zcast
<BitsY
>();
1240 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1241 CXXRTL_ALWAYS_INLINE
1242 value
<BitsY
> sshr_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1243 return a
.template sshr(b
).template scast
<BitsY
>();
1246 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1247 CXXRTL_ALWAYS_INLINE
1248 value
<BitsY
> shift_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1249 return shr_uu
<BitsY
>(a
, b
);
1252 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1253 CXXRTL_ALWAYS_INLINE
1254 value
<BitsY
> shift_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1255 return shr_su
<BitsY
>(a
, b
);
1258 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1259 CXXRTL_ALWAYS_INLINE
1260 value
<BitsY
> shift_us(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1261 return b
.is_neg() ? shl_uu
<BitsY
>(a
, b
.template sext
<BitsB
+ 1>().neg()) : shr_uu
<BitsY
>(a
, b
);
1264 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1265 CXXRTL_ALWAYS_INLINE
1266 value
<BitsY
> shift_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1267 return b
.is_neg() ? shl_su
<BitsY
>(a
, b
.template sext
<BitsB
+ 1>().neg()) : shr_su
<BitsY
>(a
, b
);
1270 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1271 CXXRTL_ALWAYS_INLINE
1272 value
<BitsY
> shiftx_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1273 return shift_uu
<BitsY
>(a
, b
);
1276 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1277 CXXRTL_ALWAYS_INLINE
1278 value
<BitsY
> shiftx_su(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1279 return shift_su
<BitsY
>(a
, b
);
1282 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1283 CXXRTL_ALWAYS_INLINE
1284 value
<BitsY
> shiftx_us(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1285 return shift_us
<BitsY
>(a
, b
);
1288 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1289 CXXRTL_ALWAYS_INLINE
1290 value
<BitsY
> shiftx_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1291 return shift_ss
<BitsY
>(a
, b
);
1294 // Comparison operations
1295 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1296 CXXRTL_ALWAYS_INLINE
1297 value
<BitsY
> eq_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1298 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1299 return value
<BitsY
>{ a
.template zext
<BitsExt
>() == b
.template zext
<BitsExt
>() ? 1u : 0u };
1302 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1303 CXXRTL_ALWAYS_INLINE
1304 value
<BitsY
> eq_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1305 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1306 return value
<BitsY
>{ a
.template sext
<BitsExt
>() == b
.template sext
<BitsExt
>() ? 1u : 0u };
1309 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1310 CXXRTL_ALWAYS_INLINE
1311 value
<BitsY
> ne_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1312 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1313 return value
<BitsY
>{ a
.template zext
<BitsExt
>() != b
.template zext
<BitsExt
>() ? 1u : 0u };
1316 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1317 CXXRTL_ALWAYS_INLINE
1318 value
<BitsY
> ne_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1319 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1320 return value
<BitsY
>{ a
.template sext
<BitsExt
>() != b
.template sext
<BitsExt
>() ? 1u : 0u };
1323 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1324 CXXRTL_ALWAYS_INLINE
1325 value
<BitsY
> eqx_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1326 return eq_uu
<BitsY
>(a
, b
);
1329 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1330 CXXRTL_ALWAYS_INLINE
1331 value
<BitsY
> eqx_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1332 return eq_ss
<BitsY
>(a
, b
);
1335 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1336 CXXRTL_ALWAYS_INLINE
1337 value
<BitsY
> nex_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1338 return ne_uu
<BitsY
>(a
, b
);
1341 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1342 CXXRTL_ALWAYS_INLINE
1343 value
<BitsY
> nex_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1344 return ne_ss
<BitsY
>(a
, b
);
1347 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1348 CXXRTL_ALWAYS_INLINE
1349 value
<BitsY
> gt_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1350 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1351 return value
<BitsY
> { b
.template zext
<BitsExt
>().ucmp(a
.template zext
<BitsExt
>()) ? 1u : 0u };
1354 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1355 CXXRTL_ALWAYS_INLINE
1356 value
<BitsY
> gt_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1357 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1358 return value
<BitsY
> { b
.template sext
<BitsExt
>().scmp(a
.template sext
<BitsExt
>()) ? 1u : 0u };
1361 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1362 CXXRTL_ALWAYS_INLINE
1363 value
<BitsY
> ge_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1364 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1365 return value
<BitsY
> { !a
.template zext
<BitsExt
>().ucmp(b
.template zext
<BitsExt
>()) ? 1u : 0u };
1368 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1369 CXXRTL_ALWAYS_INLINE
1370 value
<BitsY
> ge_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1371 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1372 return value
<BitsY
> { !a
.template sext
<BitsExt
>().scmp(b
.template sext
<BitsExt
>()) ? 1u : 0u };
1375 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1376 CXXRTL_ALWAYS_INLINE
1377 value
<BitsY
> lt_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1378 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1379 return value
<BitsY
> { a
.template zext
<BitsExt
>().ucmp(b
.template zext
<BitsExt
>()) ? 1u : 0u };
1382 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1383 CXXRTL_ALWAYS_INLINE
1384 value
<BitsY
> lt_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1385 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1386 return value
<BitsY
> { a
.template sext
<BitsExt
>().scmp(b
.template sext
<BitsExt
>()) ? 1u : 0u };
1389 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1390 CXXRTL_ALWAYS_INLINE
1391 value
<BitsY
> le_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1392 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1393 return value
<BitsY
> { !b
.template zext
<BitsExt
>().ucmp(a
.template zext
<BitsExt
>()) ? 1u : 0u };
1396 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1397 CXXRTL_ALWAYS_INLINE
1398 value
<BitsY
> le_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1399 constexpr size_t BitsExt
= max(BitsA
, BitsB
);
1400 return value
<BitsY
> { !b
.template sext
<BitsExt
>().scmp(a
.template sext
<BitsExt
>()) ? 1u : 0u };
1403 // Arithmetic operations
1404 template<size_t BitsY
, size_t BitsA
>
1405 CXXRTL_ALWAYS_INLINE
1406 value
<BitsY
> pos_u(const value
<BitsA
> &a
) {
1407 return a
.template zcast
<BitsY
>();
1410 template<size_t BitsY
, size_t BitsA
>
1411 CXXRTL_ALWAYS_INLINE
1412 value
<BitsY
> pos_s(const value
<BitsA
> &a
) {
1413 return a
.template scast
<BitsY
>();
1416 template<size_t BitsY
, size_t BitsA
>
1417 CXXRTL_ALWAYS_INLINE
1418 value
<BitsY
> neg_u(const value
<BitsA
> &a
) {
1419 return a
.template zcast
<BitsY
>().neg();
1422 template<size_t BitsY
, size_t BitsA
>
1423 CXXRTL_ALWAYS_INLINE
1424 value
<BitsY
> neg_s(const value
<BitsA
> &a
) {
1425 return a
.template scast
<BitsY
>().neg();
1428 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1429 CXXRTL_ALWAYS_INLINE
1430 value
<BitsY
> add_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1431 return a
.template zcast
<BitsY
>().add(b
.template zcast
<BitsY
>());
1434 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1435 CXXRTL_ALWAYS_INLINE
1436 value
<BitsY
> add_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1437 return a
.template scast
<BitsY
>().add(b
.template scast
<BitsY
>());
1440 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1441 CXXRTL_ALWAYS_INLINE
1442 value
<BitsY
> sub_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1443 return a
.template zcast
<BitsY
>().sub(b
.template zcast
<BitsY
>());
1446 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1447 CXXRTL_ALWAYS_INLINE
1448 value
<BitsY
> sub_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1449 return a
.template scast
<BitsY
>().sub(b
.template scast
<BitsY
>());
1452 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1453 CXXRTL_ALWAYS_INLINE
1454 value
<BitsY
> mul_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1455 constexpr size_t BitsM
= BitsA
>= BitsB
? BitsA
: BitsB
;
1456 return a
.template zcast
<BitsM
>().template mul
<BitsY
>(b
.template zcast
<BitsM
>());
1459 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1460 CXXRTL_ALWAYS_INLINE
1461 value
<BitsY
> mul_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1462 return a
.template scast
<BitsY
>().template mul
<BitsY
>(b
.template scast
<BitsY
>());
1465 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1466 CXXRTL_ALWAYS_INLINE
1467 std::pair
<value
<BitsY
>, value
<BitsY
>> divmod_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1468 constexpr size_t Bits
= max(BitsY
, max(BitsA
, BitsB
));
1469 value
<Bits
> quotient
;
1470 value
<Bits
> dividend
= a
.template zext
<Bits
>();
1471 value
<Bits
> divisor
= b
.template zext
<Bits
>();
1472 if (dividend
.ucmp(divisor
))
1473 return {/*quotient=*/value
<BitsY
> { 0u }, /*remainder=*/dividend
.template trunc
<BitsY
>()};
1474 uint32_t divisor_shift
= dividend
.ctlz() - divisor
.ctlz();
1475 divisor
= divisor
.shl(value
<32> { divisor_shift
});
1476 for (size_t step
= 0; step
<= divisor_shift
; step
++) {
1477 quotient
= quotient
.shl(value
<1> { 1u });
1478 if (!dividend
.ucmp(divisor
)) {
1479 dividend
= dividend
.sub(divisor
);
1480 quotient
.set_bit(0, true);
1482 divisor
= divisor
.shr(value
<1> { 1u });
1484 return {quotient
.template trunc
<BitsY
>(), /*remainder=*/dividend
.template trunc
<BitsY
>()};
1487 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1488 CXXRTL_ALWAYS_INLINE
1489 std::pair
<value
<BitsY
>, value
<BitsY
>> divmod_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1490 value
<BitsA
+ 1> ua
= a
.template sext
<BitsA
+ 1>();
1491 value
<BitsB
+ 1> ub
= b
.template sext
<BitsB
+ 1>();
1492 if (ua
.is_neg()) ua
= ua
.neg();
1493 if (ub
.is_neg()) ub
= ub
.neg();
1495 std::tie(y
, r
) = divmod_uu
<BitsY
>(ua
, ub
);
1496 if (a
.is_neg() != b
.is_neg()) y
= y
.neg();
1497 if (a
.is_neg()) r
= r
.neg();
1501 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1502 CXXRTL_ALWAYS_INLINE
1503 value
<BitsY
> div_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1504 return divmod_uu
<BitsY
>(a
, b
).first
;
1507 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1508 CXXRTL_ALWAYS_INLINE
1509 value
<BitsY
> div_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1510 return divmod_ss
<BitsY
>(a
, b
).first
;
1513 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1514 CXXRTL_ALWAYS_INLINE
1515 value
<BitsY
> mod_uu(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1516 return divmod_uu
<BitsY
>(a
, b
).second
;
1519 template<size_t BitsY
, size_t BitsA
, size_t BitsB
>
1520 CXXRTL_ALWAYS_INLINE
1521 value
<BitsY
> mod_ss(const value
<BitsA
> &a
, const value
<BitsB
> &b
) {
1522 return divmod_ss
<BitsY
>(a
, b
).second
;
1526 struct memory_index
{
1530 template<size_t BitsAddr
>
1531 memory_index(const value
<BitsAddr
> &addr
, size_t offset
, size_t depth
) {
1532 static_assert(value
<BitsAddr
>::chunks
<= 1, "memory address is too wide");
1533 size_t offset_index
= addr
.data
[0];
1535 valid
= (offset_index
>= offset
&& offset_index
< offset
+ depth
);
1536 index
= offset_index
- offset
;
1540 } // namespace cxxrtl_yosys