/* register assignment: */
struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(struct ir3_compiler *compiler);
-int ir3_ra(struct ir3 *ir3);
+int ir3_ra(struct ir3_shader_variant *v);
/* legalize: */
void ir3_legalize(struct ir3 *ir, bool *has_ssbo, bool *need_pixlod, int *max_bary);
if (so->binning_pass && (ctx->compiler->gpu_id >= 600))
fixup_binning_pass(ctx);
+ /* for a6xx+, binning and draw pass VS use same VBO state, so we
+ * need to make sure not to remove any inputs that are used by
+ * the nonbinning VS.
+ */
+ if (ctx->compiler->gpu_id >= 600 && so->binning_pass) {
+ debug_assert(so->type == MESA_SHADER_VERTEX);
+ for (int i = 0; i < ir->ninputs; i++) {
+ struct ir3_instruction *in = ir->inputs[i];
+
+ if (!in)
+ continue;
+
+ unsigned n = i / 4;
+ unsigned c = i % 4;
+
+ debug_assert(n < so->nonbinning->inputs_count);
+
+ if (so->nonbinning->inputs[n].sysval)
+ continue;
+
+ /* be sure to keep inputs, even if only used in VS */
+ if (so->nonbinning->inputs[n].compmask & (1 << c))
+ array_insert(in->block, in->block->keeps, in);
+ }
+ }
+
/* Insert mov if there's same instruction for each output.
* eg. dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_expression.vertex.sampler2dshadow
*/
ir3_print(ir);
}
- ret = ir3_ra(ir);
+ ret = ir3_ra(so);
if (ret) {
DBG("RA failed!");
goto out;
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
- if (in && !(in->flags & IR3_INSTR_UNUSED)) {
- reg = in->regs[0]->num - j;
- if (half) {
- compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
- } else {
- half = !!(in->regs[0]->flags & IR3_REG_HALF);
- }
+ if (!in)
+ continue;
+
+ if (in->flags & IR3_INSTR_UNUSED)
+ continue;
+
+ reg = in->regs[0]->num - j;
+ if (half) {
+ compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+ } else {
+ half = !!(in->regs[0]->flags & IR3_REG_HALF);
}
}
so->inputs[i].regid = reg;
/* register-assign context, per-shader */
struct ir3_ra_ctx {
+ struct ir3_shader_variant *v;
struct ir3 *ir;
struct ir3_ra_reg_set *set;
static int
ra_alloc(struct ir3_ra_ctx *ctx)
{
+ /* Pre-assign VS inputs on a6xx+ binning pass shader, to align
+ * with draw pass VS, so binning and draw pass can both use the
+ * same VBO state.
+ *
+ * Note that VS inputs are expected to be full precision.
+ */
+ bool pre_assign_inputs = (ctx->ir->compiler->gpu_id >= 600) &&
+ (ctx->ir->type == MESA_SHADER_VERTEX) &&
+ ctx->v->binning_pass;
+
+ if (pre_assign_inputs) {
+ for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+ struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+ if (!instr)
+ continue;
+
+ debug_assert(!(instr->regs[0]->flags & (IR3_REG_HALF | IR3_REG_HIGH)));
+
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+ /* only consider the first component: */
+ if (id->off > 0)
+ continue;
+
+ unsigned name = ra_name(ctx, id);
+
+ unsigned n = i / 4;
+ unsigned c = i % 4;
+
+ /* 'base' is in scalar (class 0) but we need to map that
+ * the conflicting register of the appropriate class (ie.
+ * input could be vec2/vec3/etc)
+ *
+ * Note that the higher class (larger than scalar) regs
+ * are setup to conflict with others in the same class,
+ * so for example, R1 (scalar) is also the first component
+ * of D1 (vec2/double):
+ *
+ * Single (base) | Double
+ * --------------+---------------
+ * R0 | D0
+ * R1 | D0 D1
+ * R2 | D1 D2
+ * R3 | D2
+ * .. and so on..
+ */
+ unsigned reg = ctx->set->gpr_to_ra_reg[id->cls]
+ [ctx->v->nonbinning->inputs[n].regid + c];
+
+ ra_set_node_reg(ctx->g, name, reg);
+ }
+ }
+
/* pre-assign array elements:
*/
list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
}
}
+ /* also need to not conflict with any pre-assigned inputs: */
+ if (pre_assign_inputs) {
+ for (unsigned i = 0; i < ctx->ir->ninputs; i++) {
+ struct ir3_instruction *instr = ctx->ir->inputs[i];
+
+ if (!instr)
+ continue;
+
+ struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
+
+ /* only consider the first component: */
+ if (id->off > 0)
+ continue;
+
+ unsigned name = ra_name(ctx, id);
+
+ /* Check if array intersects with liverange AND register
+ * range of the input:
+ */
+ if (intersects(arr->start_ip, arr->end_ip,
+ ctx->def[name], ctx->use[name]) &&
+ intersects(base, base + arr->length,
+ i, i + class_sizes[id->cls])) {
+ base = MAX2(base, i + class_sizes[id->cls]);
+ goto retry;
+ }
+ }
+ }
+
arr->reg = base;
for (unsigned i = 0; i < arr->length; i++) {
return 0;
}
-int ir3_ra(struct ir3 *ir)
+int ir3_ra(struct ir3_shader_variant *v)
{
struct ir3_ra_ctx ctx = {
- .ir = ir,
- .set = ir->compiler->set,
+ .v = v,
+ .ir = v->ir,
+ .set = v->ir->compiler->set,
};
int ret;
v->ir = NULL;
}
+/*
+ * For creating normal shader variants, 'nonbinning' is NULL. For
+ * creating binning pass shader, it is link to corresponding normal
+ * (non-binning) variant.
+ */
static struct ir3_shader_variant *
create_variant(struct ir3_shader *shader, struct ir3_shader_key *key,
- bool binning_pass)
+ struct ir3_shader_variant *nonbinning)
{
struct ir3_shader_variant *v = CALLOC_STRUCT(ir3_shader_variant);
int ret;
v->id = ++shader->variant_count;
v->shader = shader;
- v->binning_pass = binning_pass;
+ v->binning_pass = !!nonbinning;
+ v->nonbinning = nonbinning;
v->key = *key;
v->type = shader->type;
return v;
/* compile new variant if it doesn't exist already: */
- v = create_variant(shader, key, false);
+ v = create_variant(shader, key, NULL);
if (v) {
v->next = shader->variants;
shader->variants = v;
if (v && binning_pass) {
if (!v->binning) {
- v->binning = create_variant(shader, key, true);
+ v->binning = create_variant(shader, key, v);
*created = true;
}
mtx_unlock(&shader->variants_lock);
* which is pointed to by so->binning:
*/
bool binning_pass;
- struct ir3_shader_variant *binning;
+// union {
+ struct ir3_shader_variant *binning;
+ struct ir3_shader_variant *nonbinning;
+// };
struct ir3_info info;
struct ir3 *ir;
struct fd_ringbuffer *state;
state = build_vbo_state(emit, emit->vs);
- fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x6);
-
- state = build_vbo_state(emit, emit->bs);
- fd6_emit_take_group(emit, state, FD6_GROUP_VBO_BINNING, 0x1);
+ fd6_emit_take_group(emit, state, FD6_GROUP_VBO, 0x7);
}
if (dirty & FD_DIRTY_ZSA) {
FD6_GROUP_LRZ,
FD6_GROUP_LRZ_BINNING,
FD6_GROUP_VBO,
- FD6_GROUP_VBO_BINNING,
FD6_GROUP_VS_CONST,
FD6_GROUP_FS_CONST,
FD6_GROUP_VS_TEX,
state->binning_stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
state->stateobj = fd_ringbuffer_new_object(ctx->pipe, 0x1000);
+#ifdef DEBUG
+ for (unsigned i = 0; i < bs->inputs_count; i++) {
+ if (vs->inputs[i].sysval)
+ continue;
+ debug_assert(bs->inputs[i].regid == vs->inputs[i].regid);
+ }
+#endif
+
setup_config_stateobj(state->config_stateobj, state);
setup_stateobj(state->binning_stateobj, ctx->screen, state, key, true);
setup_stateobj(state->stateobj, ctx->screen, state, key, false);