* Note the size field isn't important for relative const (since
* we don't have to do register allocation for constants).
*/
- unsigned size : 15;
-
- bool merged : 1; /* half-regs conflict with full regs (ie >= a6xx) */
+ unsigned size : 16;
/* normal registers:
* the component is in the low two bits of the reg #, so
typedef BITSET_DECLARE(regmaskstate_t, 2 * MAX_REG);
typedef struct {
+ bool mergedregs;
regmaskstate_t mask;
} regmask_t;
static inline bool
__regmask_get(regmask_t *regmask, struct ir3_register *reg, unsigned n)
{
- if (reg->merged) {
+ if (regmask->mergedregs) {
/* a6xx+ case, with merged register file, we track things in terms
* of half-precision registers, with a full precisions register
* using two half-precision slots:
static inline void
__regmask_set(regmask_t *regmask, struct ir3_register *reg, unsigned n)
{
- if (reg->merged) {
+ if (regmask->mergedregs) {
/* a6xx+ case, with merged register file, we track things in terms
* of half-precision registers, with a full precisions register
* using two half-precision slots:
}
}
-static inline void regmask_init(regmask_t *regmask)
+static inline void regmask_init(regmask_t *regmask, bool mergedregs)
{
- memset(regmask, 0, sizeof(*regmask));
+ memset(®mask->mask, 0, sizeof(regmask->mask));
+ regmask->mergedregs = mergedregs;
}
static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
{
+ assert(dst->mergedregs == a->mergedregs);
+ assert(dst->mergedregs == b->mergedregs);
+
for (unsigned i = 0; i < ARRAY_SIZE(dst->mask); i++)
dst->mask[i] = a->mask[i] | b->mask[i];
}
struct ir3_legalize_state *state = &bd->state;
bool last_input_needs_ss = false;
bool has_tex_prefetch = false;
+ bool mergedregs = ctx->compiler->gpu_id >= 600;
/* our input state is the OR of all predecessor blocks' state: */
set_foreach(block->predecessors, entry) {
if (last_n && is_barrier(last_n)) {
n->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
- regmask_init(&state->needs_sy);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
+ regmask_init(&state->needs_sy, mergedregs);
}
if (last_n && (last_n->opc == OPC_PREDT)) {
n->flags |= IR3_INSTR_SS;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
/* NOTE: consider dst register too.. it could happen that
if (regmask_get(&state->needs_ss, reg)) {
n->flags |= IR3_INSTR_SS;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
if (regmask_get(&state->needs_sy, reg)) {
n->flags |= IR3_INSTR_SY;
- regmask_init(&state->needs_sy);
+ regmask_init(&state->needs_sy, mergedregs);
}
}
if (regmask_get(&state->needs_ss_war, reg)) {
n->flags |= IR3_INSTR_SS;
last_input_needs_ss = false;
- regmask_init(&state->needs_ss_war);
- regmask_init(&state->needs_ss);
+ regmask_init(&state->needs_ss_war, mergedregs);
+ regmask_init(&state->needs_ss, mergedregs);
}
if (last_rel && (reg->num == regid(REG_A0, 0))) {
ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
{
struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
+ bool mergedregs = ctx->compiler->gpu_id >= 600;
bool progress;
ctx->so = so;
/* allocate per-block data: */
foreach_block (block, &ir->block_list) {
- block->data = rzalloc(ctx, struct ir3_legalize_block_data);
+ struct ir3_legalize_block_data *bd =
+ rzalloc(ctx, struct ir3_legalize_block_data);
+
+ regmask_init(&bd->state.needs_ss_war, mergedregs);
+ regmask_init(&bd->state.needs_ss, mergedregs);
+ regmask_init(&bd->state.needs_sy, mergedregs);
+
+ block->data = bd;
}
ir3_remove_nops(ir);