uint32_t repeat, uint32_t valid_flags)
{
struct ir3_shader_variant *v = info->data;
- bool mergedregs = v->shader->compiler->gpu_id >= 600;
reg_t val = { .dummy32 = 0 };
if (reg->flags & ~valid_flags) {
/* ignore writes to dummy register r63.x */
} else if (max < regid(48, 0)) {
if (reg->flags & IR3_REG_HALF) {
- if (mergedregs) {
+ if (v->mergedregs) {
/* starting w/ a6xx, half regs conflict with full regs: */
info->max_reg = MAX2(info->max_reg, max >> 3);
} else {
struct ir3_legalize_state *state = &bd->state;
bool last_input_needs_ss = false;
bool has_tex_prefetch = false;
- bool mergedregs = ctx->compiler->gpu_id >= 600;
+ bool mergedregs = ctx->so->mergedregs;
/* our input state is the OR of all predecessor blocks' state: */
set_foreach(block->predecessors, entry) {
ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
{
struct ir3_legalize_ctx *ctx = rzalloc(ir, struct ir3_legalize_ctx);
- bool mergedregs = ctx->compiler->gpu_id >= 600;
+ bool mergedregs = so->mergedregs;
bool progress;
ctx->so = so;
struct ir3_postsched_deps_state state = {
.ctx = ctx,
.direction = F,
- .merged = ctx->ir->compiler->gpu_id >= 600,
+ .merged = ctx->v->mergedregs,
};
foreach_instr (instr, &ctx->unscheduled_list) {
struct ir3_postsched_deps_state state = {
.ctx = ctx,
.direction = R,
- .merged = ctx->ir->compiler->gpu_id >= 600,
+ .merged = ctx->v->mergedregs,
};
foreach_instr_rev (instr, &ctx->unscheduled_list) {
ctx->use = rzalloc_array(ctx->g, unsigned, ctx->alloc_count);
/* TODO add selector callback for split (pre-a6xx) register file: */
- if (ctx->ir->compiler->gpu_id >= 600) {
+ if (ctx->v->mergedregs) {
ra_set_select_reg_callback(ctx->g, ra_select_reg_merged, ctx);
if (ctx->scalar_pass) {
struct ir3_ra_ctx ctx = {
.v = v,
.ir = v->ir,
- .set = (v->ir->compiler->gpu_id >= 600) ?
+ .set = v->mergedregs ?
v->ir->compiler->mergedregs_set : v->ir->compiler->set,
.scalar_pass = scalar_pass,
};
* the reg off.
*/
static void
-fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
+fixup_regfootprint(struct ir3_shader_variant *v)
{
unsigned i;
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
int32_t regid = v->inputs[i].regid + n;
if (v->inputs[i].half) {
- if (gpu_id < 500) {
+ if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
continue;
int32_t regid = v->outputs[i].regid + 3;
if (v->outputs[i].half) {
- if (gpu_id < 500) {
+ if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;
int32_t regid = v->sampler_prefetch[i].dst + n;
if (v->sampler_prefetch[i].half_precision) {
- if (gpu_id < 500) {
+ if (!v->mergedregs) {
v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
} else {
v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
*/
v->constlen = MAX2(v->constlen, v->info.max_const + 1);
- fixup_regfootprint(v, gpu_id);
+ fixup_regfootprint(v);
return bin;
}
v->key = *key;
v->type = shader->type;
+ if (shader->compiler->gpu_id >= 600) {
+ switch (v->type) {
+ case MESA_SHADER_TESS_CTRL:
+ case MESA_SHADER_TESS_EVAL:
+ v->mergedregs = false;
+ break;
+ case MESA_SHADER_VERTEX:
+ case MESA_SHADER_GEOMETRY:
+ /* For VS/GS, normally do mergedregs, but if there is tess
+ * we need to not used MERGEDREGS
+ */
+ v->mergedregs = !key->tessellation;
+ break;
+ default:
+ v->mergedregs = true;
+ }
+ } else {
+ v->mergedregs = false;
+ }
+
ret = ir3_compile_shader_nir(shader->compiler, v);
if (ret) {
debug_error("compile failed!");
bool per_samp;
+ /* Are we using split or merged register file? */
+ bool mergedregs;
+
/* for astc srgb workaround, the number/base of additional
* alpha tex states we need, and index of original tex states
*/