+ if (compmask & (1 << i)) {
+ state->vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+ loc++;
+ }
+ }
+ }
+ }
+ }
+
+ if (!binning_pass)
+ if (s[FS].instrlen)
+ fd6_emit_shader(ring, s[FS].v);
+
+ OUT_PKT4(ring, REG_A6XX_VFD_CONTROL_1, 6);
+ OUT_RING(ring, A6XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) |
+ A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) |
+ 0xfcfc0000);
+ OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */
+ OUT_RING(ring, 0xfcfcfcfc); /* VFD_CONTROL_3 */
+ OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */
+ OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_5 */
+ OUT_RING(ring, 0x00000000); /* VFD_CONTROL_6 */
+
+ bool fragz = s[FS].v->no_earlyz | s[FS].v->writes_pos;
+
+ OUT_PKT4(ring, REG_A6XX_RB_DEPTH_PLANE_CNTL, 1);
+ OUT_RING(ring, COND(fragz, A6XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z));
+
+ OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1);
+ OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z));
+}
+
+/* emits the program state which is not part of the stateobj because of
+ * dependency on other gl state (rasterflat or sprite-coord-replacement)
+ */
+void
+fd6_program_emit(struct fd_ringbuffer *ring, struct fd6_emit *emit)
+{
+ const struct fd6_program_state *state = fd6_emit_get_prog(emit);
+
+ if (!unlikely(emit->rasterflat || emit->sprite_coord_enable)) {
+ /* fastpath: */
+ OUT_PKT4(ring, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8);
+ for (int i = 0; i < 8; i++)
+ OUT_RING(ring, state->vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */
+
+ OUT_PKT4(ring, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+ for (int i = 0; i < 8; i++)
+ OUT_RING(ring, 0x00000000); /* VPC_VARYING_PS_REPL[i] */
+ } else {
+ /* slow-path: */
+ struct ir3_shader_variant *fs = state->fs;
+ uint32_t vinterp[8], vpsrepl[8];
+
+ memset(vinterp, 0, sizeof(vinterp));
+ memset(vpsrepl, 0, sizeof(vpsrepl));
+
+ for (int j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) {
+
+ /* NOTE: varyings are packed, so if compmask is 0xb
+ * then first, third, and fourth component occupy
+ * three consecutive varying slots:
+ */
+ unsigned compmask = fs->inputs[j].compmask;
+
+ uint32_t inloc = fs->inputs[j].inloc;
+
+ if ((fs->inputs[j].interpolate == INTERP_MODE_FLAT) ||
+ (fs->inputs[j].rasterflat && emit->rasterflat)) {
+ uint32_t loc = inloc;
+
+ for (int i = 0; i < 4; i++) {