const struct v3d_device_info *devinfo = state->devinfo;
struct qinst *qinst = n->inst;
struct v3d_qpu_instr *inst = &qinst->qpu;
+ /* If the input and output segments are shared, then all VPM reads to
+ * a location need to happen before all writes. We handle this by
+ * serializing all VPM operations for now.
+ */
+ bool separate_vpm_segment = false;
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
if (inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS)
add_write_dep(state, &state->last_vpm, n);
break;
+ case V3D_QPU_A_LDVPMV_IN:
+ case V3D_QPU_A_LDVPMD_IN:
+ case V3D_QPU_A_LDVPMG_IN:
+ case V3D_QPU_A_LDVPMP:
+ if (!separate_vpm_segment)
+ add_write_dep(state, &state->last_vpm, n);
+ break;
+
case V3D_QPU_A_VPMWT:
add_read_dep(state, state->last_vpm, n);
break;
if (inst->sig.ldtlb | inst->sig.ldtlbu)
add_read_dep(state, state->last_tlb, n);
- if (inst->sig.ldvpm)
+ if (inst->sig.ldvpm) {
add_write_dep(state, &state->last_vpm_read, n);
+ /* At least for now, we're doing shared I/O segments, so queue
+ * all writes after all reads.
+ */
+ if (!separate_vpm_segment)
+ add_write_dep(state, &state->last_vpm, n);
+ }
+
/* inst->sig.ldunif or sideband uniform read */
if (qinst->uniform != ~0)
add_write_dep(state, &state->last_unif, n);
/* Total number of components written, for the shader state record. */
uint32_t vpm_output_size;
+ /* Set if there should be separate VPM segments for input and output.
+ * If unset, vpm_input_size will be 0.
+ */
+ bool separate_segments;
+
/* Value to be programmed in VCM_CACHE_SIZE. */
uint8_t vcm_cache_size;
};
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
+ /* Set us up for shared input/output segments. This is apparently
+ * necessary for our VCM setup to avoid varying corruption.
+ */
+ prog_data->separate_segments = false;
+ prog_data->vpm_output_size = MAX2(prog_data->vpm_output_size,
+ prog_data->vpm_input_size);
+ prog_data->vpm_input_size = 0;
+
/* Compute VCM cache size. We set up our program to take up less than
* half of the VPM, so that any set of bin and render programs won't
* run out of space. We need space for at least one input segment,
/* XXX: Use combined input/output size flag in the common
* case.
*/
- shader.coordinate_shader_has_separate_input_and_output_vpm_blocks = true;
- shader.vertex_shader_has_separate_input_and_output_vpm_blocks = true;
+ shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
+ v3d->prog.cs->prog_data.vs->separate_segments;
+ shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
+ v3d->prog.vs->prog_data.vs->separate_segments;
+
shader.coordinate_shader_input_vpm_segment_size =
- MAX2(v3d->prog.cs->prog_data.vs->vpm_input_size, 1);
+ v3d->prog.cs->prog_data.vs->vpm_input_size;
shader.vertex_shader_input_vpm_segment_size =
- MAX2(v3d->prog.vs->prog_data.vs->vpm_input_size, 1);
+ v3d->prog.vs->prog_data.vs->vpm_input_size;
shader.coordinate_shader_output_vpm_segment_size =
v3d->prog.cs->prog_data.vs->vpm_output_size;