} else if (slot == VARYING_SLOT_COL1) {
slot = VARYING_SLOT_BFC1;
} else {
- return 0;
+ return -1;
}
for (j = 0; j < so->outputs_count; j++)
debug_assert(0);
- return 0;
+ return -1;
}
static inline int
}
struct ir3_shader_linkage {
+ /* Maximum location either consumed by the fragment shader or produced by
+ * the last geometry stage, i.e. the size required for each vertex in the
+ * VPC in DWORD's.
+ */
uint8_t max_loc;
+
+ /* Number of entries in var. */
uint8_t cnt;
+
+ /* Bitset of locations used, including ones which are only used by the FS.
+ */
+ uint32_t varmask[4];
+
+ /* Map from VS output to location. */
struct {
uint8_t regid;
uint8_t compmask;
uint8_t loc;
} var[32];
+
+ /* location for fixed-function gl_PrimitiveID passthrough */
+ uint8_t primid_loc;
};
static inline void
-ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid, uint8_t compmask, uint8_t loc)
+ir3_link_add(struct ir3_shader_linkage *l, uint8_t regid_, uint8_t compmask, uint8_t loc)
{
- int i = l->cnt++;
- debug_assert(i < ARRAY_SIZE(l->var));
- l->var[i].regid = regid;
- l->var[i].compmask = compmask;
- l->var[i].loc = loc;
+ for (int j = 0; j < util_last_bit(compmask); j++) {
+ uint8_t comploc = loc + j;
+ l->varmask[comploc / 32] |= 1 << (comploc % 32);
+ }
+
l->max_loc = MAX2(l->max_loc, loc + util_last_bit(compmask));
+
+ if (regid_ != regid(63, 0)) {
+ int i = l->cnt++;
+ debug_assert(i < ARRAY_SIZE(l->var));
+
+ l->var[i].regid = regid_;
+ l->var[i].compmask = compmask;
+ l->var[i].loc = loc;
+ }
}
static inline void
ir3_link_shaders(struct ir3_shader_linkage *l,
const struct ir3_shader_variant *vs,
- const struct ir3_shader_variant *fs)
+ const struct ir3_shader_variant *fs,
+ bool pack_vs_out)
{
+ /* On older platforms, varmask isn't programmed at all, and it appears
+ * that the hardware generates a mask of used VPC locations using the VS
+ * output map, and hangs if a FS bary instruction references a location
+ * not in the list. This means that we need to have a dummy entry in the
+ * VS out map for things like gl_PointCoord which aren't written by the
+ * VS. Furthermore we can't use r63.x, so just pick a random register to
+ * use if there is no VS output.
+ */
+ const unsigned default_regid = pack_vs_out ? regid(63, 0) : regid(0, 0);
int j = -1, k;
+ l->primid_loc = 0xff;
+
while (l->cnt < ARRAY_SIZE(l->var)) {
j = ir3_next_varying(fs, j);
k = ir3_find_output(vs, fs->inputs[j].slot);
- ir3_link_add(l, vs->outputs[k].regid,
+ if (k < 0 && fs->inputs[j].slot == VARYING_SLOT_PRIMITIVE_ID) {
+ l->primid_loc = fs->inputs[j].inloc;
+ }
+
+ ir3_link_add(l, k >= 0 ? vs->outputs[k].regid : default_regid,
fs->inputs[j].compmask, fs->inputs[j].inloc);
}
}
bool has_gs = gs->type != MESA_SHADER_NONE;
const struct ir3_shader_variant *last_shader = has_gs ? gs : vs;
struct ir3_shader_linkage linkage = { 0 };
- ir3_link_shaders(&linkage, last_shader, fs);
+ ir3_link_shaders(&linkage, last_shader, fs, true);
if (last_shader->shader->stream_output.num_outputs)
tu6_link_streamout(&linkage, last_shader);
- BITSET_DECLARE(vpc_var_enables, 128) = { 0 };
- for (uint32_t i = 0; i < linkage.cnt; i++) {
- const uint32_t comp_count = util_last_bit(linkage.var[i].compmask);
- for (uint32_t j = 0; j < comp_count; j++)
- BITSET_SET(vpc_var_enables, linkage.var[i].loc + j);
- }
-
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4);
- tu_cs_emit(cs, ~vpc_var_enables[0]);
- tu_cs_emit(cs, ~vpc_var_enables[1]);
- tu_cs_emit(cs, ~vpc_var_enables[2]);
- tu_cs_emit(cs, ~vpc_var_enables[3]);
+ tu_cs_emit(cs, ~linkage.varmask[0]);
+ tu_cs_emit(cs, ~linkage.varmask[1]);
+ tu_cs_emit(cs, ~linkage.varmask[2]);
+ tu_cs_emit(cs, ~linkage.varmask[3]);
/* a6xx finds position/pointsize at the end */
const uint32_t position_regid =
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));
struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, vp, fp);
+ ir3_link_shaders(&l, vp, fp, false);
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
uint32_t reg = 0;
A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in));
struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, s[VS].v, s[FS].v);
+ ir3_link_shaders(&l, s[VS].v, s[FS].v, false);
for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
uint32_t reg = 0;
COND(s[VS].v->num_samp > 0, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));
struct ir3_shader_linkage l = {0};
- ir3_link_shaders(&l, s[VS].v, s[FS].v);
+ ir3_link_shaders(&l, s[VS].v, s[FS].v, true);
if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
!emit->binning_pass)
link_stream_out(&l, s[VS].v);
- BITSET_DECLARE(varbs, 128) = {0};
- uint32_t *varmask = (uint32_t *)varbs;
-
- for (i = 0; i < l.cnt; i++)
- for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
- BITSET_SET(varbs, l.var[i].loc + j);
-
OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
- OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */
- OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */
- OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */
- OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */
+ OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
+ OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
+ OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
+ OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
/* a5xx appends pos/psize to end of the linkage map: */
if (pos_regid != regid(63,0))
struct ir3_shader_linkage l = {0};
const struct ir3_shader_variant *last_shader = fd6_last_shader(state);
- ir3_link_shaders(&l, last_shader, fs);
-
- BITSET_DECLARE(varbs, 128) = {0};
- uint32_t *varmask = (uint32_t *)varbs;
-
- for (i = 0; i < l.cnt; i++)
- for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
- BITSET_SET(varbs, l.var[i].loc + j);
+ ir3_link_shaders(&l, last_shader, fs, true);
OUT_PKT4(ring, REG_A6XX_VPC_VAR_DISABLE(0), 4);
- OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */
- OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */
- OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */
- OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */
+ OUT_RING(ring, ~l.varmask[0]); /* VPC_VAR[0].DISABLE */
+ OUT_RING(ring, ~l.varmask[1]); /* VPC_VAR[1].DISABLE */
+ OUT_RING(ring, ~l.varmask[2]); /* VPC_VAR[2].DISABLE */
+ OUT_RING(ring, ~l.varmask[3]); /* VPC_VAR[3].DISABLE */
/* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */
if (last_shader->shader->stream_output.num_outputs > 0)