tu_cs_emit(cs, xs->instrlen);
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
- tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(xs->constlen, 4)) |
+ tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
A6XX_HLSQ_VS_CNTL_ENABLED);
/* emit program binary
tu6_emit_fs_outputs(struct tu_cs *cs,
const struct ir3_shader_variant *fs,
uint32_t mrt_count, bool dual_src_blend,
- uint32_t render_components)
+ uint32_t render_components,
+ bool is_s8_uint)
{
uint32_t smask_regid, posz_regid;
enum a6xx_ztest_mode zmode;
- if (fs->no_earlyz || fs->has_kill || fs->writes_pos) {
+ if (fs->no_earlyz || fs->has_kill || fs->writes_pos || is_s8_uint) {
zmode = A6XX_LATE_Z;
} else {
zmode = A6XX_EARLY_Z;
tu6_emit_fs_inputs(cs, fs);
tu6_emit_fs_outputs(cs, fs, builder->color_attachment_count,
builder->use_dual_src_blend,
- builder->render_components);
+ builder->render_components,
+ builder->depth_attachment_format == VK_FORMAT_S8_UINT);
} else {
/* TODO: check if these can be skipped if fs is disabled */
struct ir3_shader_variant dummy_variant = {};
tu6_emit_fs_inputs(cs, &dummy_variant);
tu6_emit_fs_outputs(cs, &dummy_variant, builder->color_attachment_count,
builder->use_dual_src_blend,
- builder->render_components);
+ builder->render_components,
+ builder->depth_attachment_format == VK_FORMAT_S8_UINT);
}
if (gs || hs) {
{
uint32_t vfd_decode_idx = 0;
uint32_t binding_instanced = 0; /* bitmask of instanced bindings */
+ uint32_t step_rate[MAX_VBS];
for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) {
const VkVertexInputBindingDescription *binding =
binding_instanced |= 1 << binding->binding;
*bindings_used |= 1 << binding->binding;
+ step_rate[binding->binding] = 1;
+ }
+
+ const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state =
+ vk_find_struct_const(info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
+ if (div_state) {
+ for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) {
+ const VkVertexInputBindingDivisorDescriptionEXT *desc =
+ &div_state->pVertexBindingDivisors[i];
+ step_rate[desc->binding] = desc->divisor;
+ }
}
/* TODO: emit all VFD_DECODE/VFD_DEST_CNTL in same (two) pkt4 */
&info->pVertexAttributeDescriptions[i];
uint32_t input_idx;
+ assert(*bindings_used & BIT(attr->binding));
+
for (input_idx = 0; input_idx < vs->inputs_count; input_idx++) {
if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == attr->location)
break;
.swap = format.swap,
.unk30 = 1,
._float = !vk_format_is_int(attr->format)),
- A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, 1));
+ A6XX_VFD_DECODE_STEP_RATE(vfd_decode_idx, step_rate[attr->binding]));
tu_cs_emit_regs(cs,
A6XX_VFD_DEST_CNTL_INSTR(vfd_decode_idx,
void
tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor)
{
- const VkOffset2D min = scissor->offset;
- const VkOffset2D max = {
+ VkOffset2D min = scissor->offset;
+ VkOffset2D max = {
scissor->offset.x + scissor->extent.width,
scissor->offset.y + scissor->extent.height,
};
- tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2);
- tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(min.x) |
- A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(min.y));
- tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(max.x - 1) |
- A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1));
+ /* special case for empty scissor with max == 0 to avoid overflow */
+ if (max.x == 0)
+ min.x = max.x = 1;
+ if (max.y == 0)
+ min.y = max.y = 1;
+
+ /* avoid overflow with large scissor
+ * note the max will be limited to min - 1, so that empty scissor works
+ */
+ uint32_t scissor_max = BITFIELD_MASK(15);
+ min.x = MIN2(scissor_max, min.x);
+ min.y = MIN2(scissor_max, min.y);
+ max.x = MIN2(scissor_max, max.x);
+ max.y = MIN2(scissor_max, max.y);
+
+ tu_cs_emit_regs(cs,
+ A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0(.x = min.x, .y = min.y),
+ A6XX_GRAS_SC_SCREEN_SCISSOR_BR_0(.x = max.x - 1, .y = max.y - 1));
}
void
const VkPipelineDepthStencilStateCreateInfo *ds_info,
const VkPipelineRasterizationStateCreateInfo *rast_info)
{
- assert(!ds_info->depthBoundsTestEnable);
-
uint32_t rb_depth_cntl = 0;
if (ds_info->depthTestEnable) {
rb_depth_cntl |=
A6XX_RB_DEPTH_CNTL_Z_ENABLE |
A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) |
- A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+ A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; /* TODO: don't set for ALWAYS/NEVER */
if (rast_info->depthClampEnable)
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLAMP_ENABLE;
rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE;
}
+ if (ds_info->depthBoundsTestEnable)
+ rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE;
+
tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_CNTL, 1);
tu_cs_emit(cs, rb_depth_cntl);
}
tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder,
struct tu_pipeline *pipeline)
{
+ const struct ir3_compiler *compiler = builder->device->compiler;
const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
NULL
};
pipeline->tess.patch_type = key.tessellation;
- for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
- stage > MESA_SHADER_NONE; stage--) {
+ for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+ stage < MESA_SHADER_STAGES; stage++) {
if (!builder->shaders[stage])
continue;
return VK_ERROR_OUT_OF_HOST_MEMORY;
}
+ uint32_t safe_constlens = ir3_trim_constlen(builder->variants, compiler);
+
+ key.safe_constlen = true;
+
+ for (gl_shader_stage stage = MESA_SHADER_VERTEX;
+ stage < MESA_SHADER_STAGES; stage++) {
+ if (!builder->shaders[stage])
+ continue;
+
+ if (safe_constlens & (1 << stage)) {
+ bool created;
+ builder->variants[stage] =
+ ir3_shader_get_variant(builder->shaders[stage]->ir3_shader,
+ &key, false, &created);
+ if (!builder->variants[stage])
+ return VK_ERROR_OUT_OF_HOST_MEMORY;
+ }
+ }
+
const struct tu_shader *vs = builder->shaders[MESA_SHADER_VERTEX];
struct ir3_shader_variant *variant;
variant = builder->variants[MESA_SHADER_VERTEX];
} else {
bool created;
+ key.safe_constlen = !!(safe_constlens & (1 << MESA_SHADER_VERTEX));
variant = ir3_shader_get_variant(vs->ir3_shader, &key,
true, &created);
if (!variant)
const VkPipelineRasterizationStateCreateInfo *rast_info =
builder->create_info->pRasterizationState;
- assert(rast_info->polygonMode == VK_POLYGON_MODE_FILL);
+ enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode);
struct tu_cs cs;
- tu_cs_begin_sub_stream(&pipeline->cs, 7, &cs);
+ tu_cs_begin_sub_stream(&pipeline->cs, 11, &cs);
tu_cs_emit_regs(&cs,
A6XX_GRAS_CL_CNTL(
.unk5 = rast_info->depthClampEnable,
.zero_gb_scale_z = 1,
.vp_clip_code_ignore = 1));
+
+ tu_cs_emit_regs(&cs,
+ A6XX_VPC_POLYGON_MODE(.mode = mode));
+
+ tu_cs_emit_regs(&cs,
+ A6XX_PC_POLYGON_MODE(.mode = mode));
+
/* move to hw ctx init? */
tu_cs_emit_regs(&cs, A6XX_GRAS_UNKNOWN_8001());
tu_cs_emit_regs(&cs,
pipeline->ds.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &cs);
+ if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) {
+ tu_cs_emit_regs(&cs,
+ A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds),
+ A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds));
+ }
+
if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) {
tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff,
.bfmask = ds_info->back.compareMask & 0xff));