if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
val.idummy10 = reg->array.offset;
- max = (reg->array.offset + repeat + components - 1) >> 2;
+ max = (reg->array.offset + repeat + components - 1);
} else {
components = util_last_bit(reg->wrmask);
val.comp = reg->num & 0x3;
val.num = reg->num >> 2;
- max = (reg->num + repeat + components - 1) >> 2;
+ max = (reg->num + repeat + components - 1);
}
if (reg->flags & IR3_REG_CONST) {
- info->max_const = MAX2(info->max_const, max);
+ info->max_const = MAX2(info->max_const, max >> 2);
} else if (val.num == 63) {
/* ignore writes to dummy register r63.x */
- } else if (max < 48) {
+ } else if (max < regid(48, 0)) {
if (reg->flags & IR3_REG_HALF) {
if (info->gpu_id >= 600) {
/* starting w/ a6xx, half regs conflict with full regs: */
- info->max_reg = MAX2(info->max_reg, (max+1)/2);
+ info->max_reg = MAX2(info->max_reg, max >> 3);
} else {
- info->max_half_reg = MAX2(info->max_half_reg, max);
+ info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
}
} else {
- info->max_reg = MAX2(info->max_reg, max);
+ info->max_reg = MAX2(info->max_reg, max >> 2);
}
}
}
struct ir3_instruction *instr = ir->outputs[(i*4) + j];
if (instr) {
so->outputs[i].regid = instr->regs[0]->num;
+ so->outputs[i].half = !!(instr->regs[0]->flags & IR3_REG_HALF);
break;
}
}
/* Note that some or all channels of an input may be unused: */
for (i = 0; i < so->inputs_count; i++) {
unsigned j, reg = regid(63,0);
+ bool half = false;
for (j = 0; j < 4; j++) {
struct ir3_instruction *in = inputs[(i*4) + j];
if (in && !(in->flags & IR3_INSTR_UNUSED)) {
reg = in->regs[0]->num - j;
+ if (half) {
+ compile_assert(ctx, in->regs[0]->flags & IR3_REG_HALF);
+ } else {
+ half = !!(in->regs[0]->flags & IR3_REG_HALF);
+ }
}
}
so->inputs[i].regid = reg;
+ so->inputs[i].half = half;
}
if (ctx->astc_srgb)
* the reg off.
*/
static void
-fixup_regfootprint(struct ir3_shader_variant *v)
+fixup_regfootprint(struct ir3_shader_variant *v, uint32_t gpu_id)
{
unsigned i;
if (v->inputs[i].compmask) {
unsigned n = util_last_bit(v->inputs[i].compmask) - 1;
- int32_t regid = (v->inputs[i].regid + n) >> 2;
- v->info.max_reg = MAX2(v->info.max_reg, regid);
+ int32_t regid = v->inputs[i].regid + n;
+ if (v->inputs[i].half) {
+ if (gpu_id < 500) {
+ v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+ }
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+ }
}
}
for (i = 0; i < v->outputs_count; i++) {
- int32_t regid = (v->outputs[i].regid + 3) >> 2;
- v->info.max_reg = MAX2(v->info.max_reg, regid);
+ int32_t regid = v->outputs[i].regid + 3;
+ if (v->outputs[i].half) {
+ if (gpu_id < 500) {
+ v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);
+ }
+ } else {
+ v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);
+ }
}
}
*/
v->constlen = MIN2(255, MAX2(v->constlen, v->info.max_const + 1));
- fixup_regfootprint(v);
+ fixup_regfootprint(v, gpu_id);
return bin;
}
struct {
uint8_t slot;
uint8_t regid;
+ bool half : 1;
} outputs[16 + 2]; /* +POSITION +PSIZE */
bool writes_pos, writes_smask, writes_psize;
/* fragment shader specific: */
bool bary : 1; /* fetched varying (vs one loaded into reg) */
bool rasterflat : 1; /* special handling for emit->rasterflat */
+ bool half : 1;
enum glsl_interp_mode interpolate;
} inputs[16 + 2]; /* +POSITION +FACE */