- tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9801, 1);
- tu_cs_emit(cs,
- hs_info->tess.tcs_vertices_out * vs->output_size / 4);
+ tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
+ tu_cs_emit(cs, patch_control_points * vs->output_size / 4);
+
+ /* for A650 this value seems to be local memory size per wave */
+ if (vshs_workgroup) {
+ const uint32_t wavesize = 64;
+ /* note: if HS is really just the VS extended, then this
+ * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
+ * however that doesn't match the blob, and fails some dEQP tests.
+ */
+ uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
+ uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
+ unknown_a831 = DIV_ROUND_UP(total_size, wavesize);
+ }