* DEALINGS IN THE SOFTWARE.
*/
+#include "common/freedreno_guardband.h"
#include "tu_private.h"
#include "ir3/ir3_nir.h"
}
static unsigned
-tu6_load_state_size(struct tu_pipeline_layout *layout, bool compute)
+tu6_load_state_size(struct tu_pipeline *pipeline, bool compute)
{
const unsigned load_state_size = 4;
unsigned size = 0;
- for (unsigned i = 0; i < layout->num_sets; i++) {
- struct tu_descriptor_set_layout *set_layout = layout->set[i].layout;
+ for (unsigned i = 0; i < pipeline->layout->num_sets; i++) {
+ if (pipeline && !(pipeline->active_desc_sets & (1u << i)))
+ continue;
+
+ struct tu_descriptor_set_layout *set_layout = pipeline->layout->set[i].layout;
for (unsigned j = 0; j < set_layout->binding_count; j++) {
struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j];
unsigned count = 0;
binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT :
binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
unsigned stage_count = util_bitcount(stages);
+
+ if (!binding->array_size)
+ continue;
+
switch (binding->type) {
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
case VK_DESCRIPTOR_TYPE_SAMPLER:
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
- case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
/* Textures and UBO's needs a packet for each stage */
*/
count = stage_count * binding->array_size * 2;
break;
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ break;
default:
unreachable("bad descriptor type");
}
static void
tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
{
- unsigned size = tu6_load_state_size(pipeline->layout, compute);
+ unsigned size = tu6_load_state_size(pipeline, compute);
if (size == 0)
return;
}
}
- pipeline->load_state.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
+ pipeline->load_state = tu_cs_end_draw_state(&pipeline->cs, &cs);
}
struct tu_pipeline_builder
const struct ir3_const_state *const_state = ir3_const_state(xs);
uint32_t base = const_state->offsets.immediate;
- int size = const_state->immediates_count;
+ int size = DIV_ROUND_UP(const_state->immediates_count, 4);
/* truncate size to avoid writing constants that shader
* does not use:
const struct ir3_shader_variant *v,
uint32_t binary_iova)
{
- tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
- tu_cs_emit(cs, 0xff);
+ tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+ .cs_state = true,
+ .cs_ibo = true));
tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v, binary_iova);
const struct ir3_shader_variant *hs,
const struct ir3_shader_variant *ds,
const struct ir3_shader_variant *gs,
- const struct ir3_shader_variant *fs)
+ const struct ir3_shader_variant *fs,
+ uint32_t patch_control_points,
+ bool vshs_workgroup)
{
/* note: doesn't compile as static because of the array regs.. */
const struct reg_config {
ir3_find_output_regid(last_shader, VARYING_SLOT_POS);
const uint32_t pointsize_regid =
ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ);
- const uint32_t layer_regid = gs ?
- ir3_find_output_regid(gs, VARYING_SLOT_LAYER) : regid(63, 0);
+ const uint32_t layer_regid =
+ ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER);
uint32_t primitive_regid = gs ?
ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : regid(63, 0);
uint32_t flags_regid = gs ?
tu6_setup_streamout(cs, last_shader, &linkage);
+ /* The GPU hangs on some models when there are no outputs (xs_pack::CNT),
+ * at least when a DS is the last stage, so add a dummy output to keep it
+ * happy if there aren't any. We do this late in order to avoid emitting
+ * any unused code and make sure that optimizations don't remove it.
+ */
+ if (linkage.cnt == 0)
+ ir3_link_add(&linkage, 0, 0x1, linkage.max_loc);
+
/* map outputs of the last shader to VPC */
assert(linkage.cnt <= 32);
const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2);
tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1);
tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER));
- tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1);
- tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU));
+ tu_cs_emit_regs(cs, A6XX_PC_PRIMID_PASSTHRU(primid_passthru));
tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1);
tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs ? fs->total_in : 0) |
if (hs) {
shader_info *hs_info = &hs->shader->nir->info;
+ uint32_t unknown_a831 = vs->output_size;
+
tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1);
tu_cs_emit(cs, hs_info->tess.tcs_vertices_out);
/* Total attribute slots in HS incoming patch. */
- tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9801, 1);
- tu_cs_emit(cs,
- hs_info->tess.tcs_vertices_out * vs->output_size / 4);
+ tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1);
+ tu_cs_emit(cs, patch_control_points * vs->output_size / 4);
+
+ /* for A650 this value seems to be local memory size per wave */
+ if (vshs_workgroup) {
+ const uint32_t wavesize = 64;
+ /* note: if HS is really just the VS extended, then this
+ * should be by MAX2(patch_control_points, hs_info->tess.tcs_vertices_out)
+ * however that doesn't match the blob, and fails some dEQP tests.
+ */
+ uint32_t prims_per_wave = wavesize / hs_info->tess.tcs_vertices_out;
+ uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave;
+ unknown_a831 = DIV_ROUND_UP(total_size, wavesize);
+ }
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
- tu_cs_emit(cs, vs->output_size);
+ tu_cs_emit(cs, unknown_a831);
+
/* In SPIR-V generated from GLSL, the tessellation primitive params are
* are specified in the tess eval shader, but in SPIR-V generated from
* HLSL, they are specified in the tess control shader. */
uint32_t render_components,
bool is_s8_uint)
{
- uint32_t smask_regid, posz_regid;
+ uint32_t smask_regid, posz_regid, stencilref_regid;
posz_regid = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH);
smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK);
+ stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL);
uint32_t fragdata_regid[8];
if (fs->color0_mrt) {
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) |
A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) |
- COND(dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE) |
- 0xfc000000);
+ A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) |
+ COND(dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8);
tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2);
tu_cs_emit(cs, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) |
COND(fs->writes_smask, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) |
+ COND(fs->writes_stencilref, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) |
COND(dual_src_blend, A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE));
tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count));
enum a6xx_ztest_mode zmode;
- if (fs->no_earlyz || fs->has_kill || fs->writes_pos || is_s8_uint) {
+ if (fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || is_s8_uint) {
zmode = A6XX_LATE_Z;
} else {
zmode = A6XX_EARLY_Z;
const struct ir3_shader_variant *gs = builder->variants[MESA_SHADER_GEOMETRY];
const struct ir3_shader_variant *fs = builder->variants[MESA_SHADER_FRAGMENT];
gl_shader_stage stage = MESA_SHADER_VERTEX;
+ uint32_t cps_per_patch = builder->create_info->pTessellationState ?
+ builder->create_info->pTessellationState->patchControlPoints : 0;
STATIC_ASSERT(MESA_SHADER_VERTEX == 0);
- tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
- tu_cs_emit(cs, 0xff); /* XXX */
+ tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
+ .vs_state = true,
+ .hs_state = true,
+ .ds_state = true,
+ .gs_state = true,
+ .fs_state = true,
+ .gfx_ibo = true));
/* Don't use the binning pass variant when GS is present because we don't
* support compiling correct binning pass variants with GS.
tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_UNKNOWN_A831, 1);
tu_cs_emit(cs, 0);
- tu6_emit_vpc(cs, vs, hs, ds, gs, fs);
+ tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch,
+ builder->device->physical_device->gpu_id == 650);
tu6_emit_vpc_varying_modes(cs, fs);
if (fs) {
}
if (gs || hs) {
- uint32_t cps_per_patch = builder->create_info->pTessellationState ?
- builder->create_info->pTessellationState->patchControlPoints : 0;
tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch);
}
}
.decode_cnt = vfd_decode_idx));
}
-static uint32_t
-tu6_guardband_adj(uint32_t v)
-{
- if (v > 256)
- return (uint32_t)(511.0 - 65.0 * (log2(v) - 8.0));
- else
- return 511;
-}
-
void
tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport)
{
assert(min.y >= 0 && min.y < max.y);
VkExtent2D guardband_adj;
- guardband_adj.width = tu6_guardband_adj(max.x - min.x);
- guardband_adj.height = tu6_guardband_adj(max.y - min.y);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]).value);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]).value);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]).value);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]).value);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]).value);
- tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]).value);
-
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2);
- tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) |
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(min.y));
- tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(max.x - 1) |
- A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(max.y - 1));
+ guardband_adj.width = fd_calc_guardband(offsets[0], scales[0], false);
+ guardband_adj.height = fd_calc_guardband(offsets[1], scales[1], false);
+
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_CL_VPORT_XOFFSET(0, offsets[0]),
+ A6XX_GRAS_CL_VPORT_XSCALE(0, scales[0]),
+ A6XX_GRAS_CL_VPORT_YOFFSET(0, offsets[1]),
+ A6XX_GRAS_CL_VPORT_YSCALE(0, scales[1]),
+ A6XX_GRAS_CL_VPORT_ZOFFSET(0, offsets[2]),
+ A6XX_GRAS_CL_VPORT_ZSCALE(0, scales[2]));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), 2);
+ tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) |
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y));
+ tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(max.x - 1) |
+ A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(max.y - 1));
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1);
tu_cs_emit(cs,
float z_clamp_max = MAX2(viewport->minDepth, viewport->maxDepth);
tu_cs_emit_regs(cs,
- A6XX_GRAS_CL_Z_CLAMP_MIN(z_clamp_min),
- A6XX_GRAS_CL_Z_CLAMP_MAX(z_clamp_max));
+ A6XX_GRAS_CL_Z_CLAMP_MIN(0, z_clamp_min),
+ A6XX_GRAS_CL_Z_CLAMP_MAX(0, z_clamp_max));
tu_cs_emit_regs(cs,
A6XX_RB_Z_CLAMP_MIN(z_clamp_min),
max.y = MIN2(scissor_max, max.y);
tu_cs_emit_regs(cs,
- A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = min.x, .y = min.y),
- A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = max.x - 1, .y = max.y - 1));
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = min.x, .y = min.y),
+ A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = max.x - 1, .y = max.y - 1));
}
void
struct tu_pipeline_builder *builder,
struct ir3_shader_variant *compute)
{
- uint32_t size = 2048 + tu6_load_state_size(pipeline->layout, compute);
+ uint32_t size = 2048 + tu6_load_state_size(pipeline, compute);
/* graphics case: */
if (builder) {
struct tu_cs prog_cs;
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
tu6_emit_program(&prog_cs, builder, false);
- pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
+ pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
tu6_emit_program(&prog_cs, builder, true);
- pipeline->program.binning_state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
+ pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
VkShaderStageFlags stages = 0;
for (unsigned i = 0; i < builder->create_info->stageCount; i++) {
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(&vi_cs, vs, vi_info,
&pipeline->vi.bindings_used);
- pipeline->vi.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
+ pipeline->vi.state = tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
if (bs) {
tu_cs_begin_sub_stream(&pipeline->cs,
MAX_VERTEX_ATTRIBS * 7 + 2, &vi_cs);
tu6_emit_vertex_input(
&vi_cs, bs, vi_info, &pipeline->vi.bindings_used);
- pipeline->vi.binning_state_ib =
- tu_cs_end_sub_stream(&pipeline->cs, &vi_cs);
+ pipeline->vi.binning_state =
+ tu_cs_end_draw_state(&pipeline->cs, &vi_cs);
}
}
tu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs,
uint32_t id, uint32_t size)
{
- struct tu_cs_memory memory;
+ assert(id < ARRAY_SIZE(pipeline->dynamic_state));
if (pipeline->dynamic_state_mask & BIT(id))
return false;
- /* TODO: share this logc with tu_cmd_dynamic_state */
- tu_cs_alloc(&pipeline->cs, size, 1, &memory);
- tu_cs_init_external(cs, memory.map, memory.map + size);
- tu_cs_begin(cs);
- tu_cs_reserve_space(cs, size);
-
- assert(id < ARRAY_SIZE(pipeline->dynamic_state));
- pipeline->dynamic_state[id].iova = memory.iova;
- pipeline->dynamic_state[id].size = size;
+ pipeline->dynamic_state[id] = tu_cs_draw_state(&pipeline->cs, cs, size);
return true;
}
enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
+ bool depth_clip_disable = rast_info->depthClampEnable;
+
+ const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+ vk_find_struct_const(rast_info, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+ if (depth_clip_state)
+ depth_clip_disable = !depth_clip_state->depthClipEnable;
+
struct tu_cs cs;
- tu_cs_begin_sub_stream(&pipeline->cs, 9, &cs);
+ pipeline->rast_state = tu_cs_draw_state(&pipeline->cs, &cs, 9);
tu_cs_emit_regs(&cs,
A6XX_GRAS_CL_CNTL(
- .znear_clip_disable = rast_info->depthClampEnable,
- .zfar_clip_disable = rast_info->depthClampEnable,
+ .znear_clip_disable = depth_clip_disable,
+ .zfar_clip_disable = depth_clip_disable,
+ /* TODO should this be depth_clip_disable instead? */
.unk5 = rast_info->depthClampEnable,
.zero_gb_scale_z = 1,
.vp_clip_code_ignore = 1));
tu_cs_emit_regs(&cs,
- A6XX_VPC_POLYGON_MODE(.mode = mode));
+ A6XX_VPC_POLYGON_MODE(mode));
tu_cs_emit_regs(&cs,
- A6XX_PC_POLYGON_MODE(.mode = mode));
+ A6XX_PC_POLYGON_MODE(mode));
/* move to hw ctx init? */
tu_cs_emit_regs(&cs,
A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f),
A6XX_GRAS_SU_POINT_SIZE(1.0f));
- pipeline->rast.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
-
pipeline->gras_su_cntl =
tu6_gras_su_cntl(rast_info, builder->samples);
? ds_info : &dummy_ds_info;
struct tu_cs cs;
- tu_cs_begin_sub_stream(&pipeline->cs, 6, &cs);
+ pipeline->ds_state = tu_cs_draw_state(&pipeline->cs, &cs, 6);
/* move to hw ctx init? */
tu_cs_emit_regs(&cs, A6XX_RB_ALPHA_CONTROL());
builder->create_info->pRasterizationState);
tu6_emit_stencil_control(&cs, ds_info);
- pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
-
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
tu_cs_emit_regs(&cs,
A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds),
: &dummy_blend_info;
struct tu_cs cs;
- tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 4, &cs);
+ pipeline->blend_state =
+ tu_cs_draw_state(&pipeline->cs, &cs, blend_info->attachmentCount * 3 + 4);
uint32_t blend_enable_mask;
tu6_emit_rb_mrt_controls(&cs, blend_info,
tu6_emit_blend_control(&cs, blend_enable_mask,
builder->use_dual_src_blend, msaa_info);
- pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
+ assert(cs.cur == cs.end); /* validate draw state size */
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) {
tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4);
struct tu_cs prog_cs;
tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs);
tu6_emit_cs_config(&prog_cs, shader, v, shader_iova);
- pipeline->program.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &prog_cs);
+ pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs);
tu6_emit_load_state(pipeline, true);