const struct ir3_const_state *const_state = ir3_const_state(xs);
uint32_t base = const_state->offsets.immediate;
- int size = const_state->immediates_count;
+ int size = DIV_ROUND_UP(const_state->immediates_count, 4);
/* truncate size to avoid writing constants that shader
* does not use:
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
- for (unsigned i = 0; i < size; i++) {
- tu_cs_emit(cs, const_state->immediates[i].val[0]);
- tu_cs_emit(cs, const_state->immediates[i].val[1]);
- tu_cs_emit(cs, const_state->immediates[i].val[2]);
- tu_cs_emit(cs, const_state->immediates[i].val[3]);
- }
+ tu_cs_emit_array(cs, const_state->immediates, size * 4);
}
static void
const struct ir3_shader_variant *hs,
const struct ir3_shader_variant *ds,
const struct ir3_shader_variant *gs,
- const struct ir3_shader_variant *fs)
+ const struct ir3_shader_variant *fs,
+ uint32_t patch_control_points,
+ bool vshs_workgroup)
{
/* note: doesn't compile as static because of the array regs.. */
const struct reg_config {
tu6_setup_streamout(cs, last_shader, &linkage);
+ /* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
+ * at least when a DS is the last stage, so add a dummy output to keep it
+ * happy if there aren't any. We do this late in order to avoid emitting
+ * any unused code and make sure that optimizations don't remove it.
+ */
+ if (linkage.cnt == 0)
+ ir3_link_add(&linkage, 0, 0x1, linkage.max_loc);
+
/* map outputs of the last shader to VPC */
assert(linkage.cnt <= 32);
const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs ? fs->total_in : 0) |
COND(fs && fs->total_in, A6XX_VPC_CNTL_0_VARYING) |
A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) |
- A6XX_VPC_CNTL_0_UNKLOC(0xff));
+ A6XX_VPC_CNTL_0_VIEWIDLOC(0xff));
if (hs) {
shader_info *hs_info = &hs->shader->nir->info;
+ uint32_t unknown_a831 = vs->output_size;
+
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
tu_cs_emit(cs, hs_info->tess.tcs_vertices_out);
/* Total attribute slots in HS incoming patch. */
- tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9801, 1);
- tu_cs_emit(cs,
- hs_info->tess.tcs_vertices_out * vs->output_size / 4);
+ tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
+ tu_cs_emit(cs, patch_control_points * vs->output_size / 4);
+
+ /* for A650 this value seems to be local memory size per wave */
+ if (vshs_workgroup) {
+ const uint32_t wavesize = 64;
+ /* note: if HS is really just the VS extended, then this
+ * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
+ * however that doesn't match the blob, and fails some dEQP tests.
+ */
+ uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
+ uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
+ unknown_a831 = DIV_ROUND_UP(total_size, wavesize);
+ }
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
- tu_cs_emit(cs, vs->output_size);
+ tu_cs_emit(cs, unknown_a831);
+
/* In SPIR-V generated from GLSL, the tessellation primitive params are
* are specified in the tess eval shader, but in SPIR-V generated from
* HLSL, they are specified in the tess control shader. */
tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1);
tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size));
- tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9B07, 1);
+ tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_CNTL, 1);
tu_cs_emit(cs, 0);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1);
const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
gl_shader_stage stage = MESA_SHADER_VERTEX;
+ uint32_t cps_per_patch = builder->create_info->pTessellationState ?
+ builder->create_info->pTessellationState->patchControlPoints : 0;
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
tu_cs_emit(cs, 0);
- tu6_emit_vpc(cs, vs, hs, ds, gs, fs);
+ tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch,
+ builder->device->physical_device->gpu_id == 650);
tu6_emit_vpc_varying_modes(cs, fs);
if (fs) {
}
if (gs || hs) {
- uint32_t cps_per_patch = builder->create_info->pTessellationState ?
- builder->create_info->pTessellationState->patchControlPoints : 0;
tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
}
}