+ if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
+ (!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (pipeline->shaders[i])
+ pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+ }
+ return;
+ }
+
+ if (!modules[MESA_SHADER_FRAGMENT] && !modules[MESA_SHADER_COMPUTE]) {
+ nir_builder fs_b;
+ nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
+ fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
+ fs_m.nir = fs_b.shader;
+ modules[MESA_SHADER_FRAGMENT] = &fs_m;
+ }
+
+ /* Determine first and last stage. */
+ unsigned first = MESA_SHADER_STAGES;
+ unsigned last = 0;
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!pStages[i])
+ continue;
+ if (first == MESA_SHADER_STAGES)
+ first = i;
+ last = i;
+ }
+
+ int prev = -1;
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
+ const VkPipelineShaderStageCreateInfo *stage = pStages[i];
+
+ if (!modules[i])
+ continue;
+
+ nir[i] = radv_shader_compile_to_nir(device, modules[i],
+ stage ? stage->pName : "main", i,
+ stage ? stage->pSpecializationInfo : NULL);
+ pipeline->active_stages |= mesa_to_vk_shader_stage(i);
+
+ /* We don't want to alter meta shaders IR directly so clone it
+ * first.
+ */
+ if (nir[i]->info.name) {
+ nir[i] = nir_shader_clone(NULL, nir[i]);
+ }
+
+ if (first != last) {
+ nir_variable_mode mask = 0;
+
+ if (i != first)
+ mask = mask | nir_var_shader_in;
+
+ if (i != last)
+ mask = mask | nir_var_shader_out;
+
+ nir_lower_io_to_scalar_early(nir[i], mask);
+ radv_optimize_nir(nir[i]);
+ }
+
+ if (prev != -1) {
+ nir_compact_varyings(nir[prev], nir[i], true);
+ }
+ prev = i;
+ }
+
+ if (nir[MESA_SHADER_TESS_CTRL]) {
+ nir_lower_tes_patch_vertices(nir[MESA_SHADER_TESS_EVAL], nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out);
+ merge_tess_info(&nir[MESA_SHADER_TESS_EVAL]->info, &nir[MESA_SHADER_TESS_CTRL]->info);
+ }
+
+ radv_link_shaders(pipeline, nir);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if (modules[i] && radv_can_dump_shader(device, modules[i]))
+ nir_print_shader(nir[i], stderr);
+ }
+
+ radv_fill_shader_keys(keys, &key, nir);
+
+ if (nir[MESA_SHADER_FRAGMENT]) {
+ if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
+ pipeline->shaders[MESA_SHADER_FRAGMENT] =
+ radv_shader_variant_create(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
+ pipeline->layout, keys + MESA_SHADER_FRAGMENT,
+ &codes[MESA_SHADER_FRAGMENT], &code_sizes[MESA_SHADER_FRAGMENT]);
+ }
+
+ /* TODO: These are no longer used as keys we should refactor this */
+ keys[MESA_SHADER_VERTEX].vs.export_prim_id =
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
+ keys[MESA_SHADER_TESS_EVAL].tes.export_prim_id =
+ pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_TESS_CTRL]) {
+ if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) {
+ struct nir_shader *combined_nir[] = {nir[MESA_SHADER_VERTEX], nir[MESA_SHADER_TESS_CTRL]};
+ struct ac_shader_variant_key key = keys[MESA_SHADER_TESS_CTRL];
+ key.tcs.vs_key = keys[MESA_SHADER_VERTEX].vs;
+ pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_create(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
+ pipeline->layout,
+ &key, &codes[MESA_SHADER_TESS_CTRL],
+ &code_sizes[MESA_SHADER_TESS_CTRL]);
+ }
+ modules[MESA_SHADER_VERTEX] = NULL;
+ }
+
+ if (device->physical_device->rad_info.chip_class >= GFX9 && modules[MESA_SHADER_GEOMETRY]) {
+ gl_shader_stage pre_stage = modules[MESA_SHADER_TESS_EVAL] ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
+ if (!pipeline->shaders[MESA_SHADER_GEOMETRY]) {
+ struct nir_shader *combined_nir[] = {nir[pre_stage], nir[MESA_SHADER_GEOMETRY]};
+ pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_create(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
+ pipeline->layout,
+ &keys[pre_stage] , &codes[MESA_SHADER_GEOMETRY],
+ &code_sizes[MESA_SHADER_GEOMETRY]);
+ }
+ modules[pre_stage] = NULL;
+ }
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ if(modules[i] && !pipeline->shaders[i]) {
+ pipeline->shaders[i] = radv_shader_variant_create(device, modules[i], &nir[i], 1,
+ pipeline->layout,
+ keys + i, &codes[i],
+ &code_sizes[i]);
+ }
+ }
+
+ if(modules[MESA_SHADER_GEOMETRY]) {
+ void *gs_copy_code = NULL;
+ unsigned gs_copy_code_size = 0;
+ if (!pipeline->gs_copy_shader) {
+ pipeline->gs_copy_shader = radv_create_gs_copy_shader(
+ device, nir[MESA_SHADER_GEOMETRY], &gs_copy_code,
+ &gs_copy_code_size,
+ keys[MESA_SHADER_GEOMETRY].has_multiview_view_index);
+ }
+
+ if (pipeline->gs_copy_shader) {
+ void *code[MESA_SHADER_STAGES] = {0};
+ unsigned code_size[MESA_SHADER_STAGES] = {0};
+ struct radv_shader_variant *variants[MESA_SHADER_STAGES] = {0};
+
+ code[MESA_SHADER_GEOMETRY] = gs_copy_code;
+ code_size[MESA_SHADER_GEOMETRY] = gs_copy_code_size;
+ variants[MESA_SHADER_GEOMETRY] = pipeline->gs_copy_shader;
+
+ radv_pipeline_cache_insert_shaders(device, cache,
+ gs_copy_hash,
+ variants,
+ (const void**)code,
+ code_size);
+ }
+ free(gs_copy_code);
+ }
+
+ radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline->shaders,
+ (const void**)codes, code_sizes);
+
+ for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
+ free(codes[i]);
+ if (modules[i] && !pipeline->device->keep_shader_info)
+ ralloc_free(nir[i]);
+ }
+
+ if (fs_m.nir)
+ ralloc_free(fs_m.nir);
+}
+
+static uint32_t
+radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline,
+ gl_shader_stage stage, enum chip_class chip_class)
+{
+ bool has_gs = radv_pipeline_has_gs(pipeline);
+ bool has_tess = radv_pipeline_has_tess(pipeline);
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return R_00B030_SPI_SHADER_USER_DATA_PS_0;
+ case MESA_SHADER_VERTEX:
+ if (chip_class >= GFX9) {
+ return has_tess ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_tess)
+ return R_00B530_SPI_SHADER_USER_DATA_LS_0;
+ else
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 : R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ case MESA_SHADER_GEOMETRY:
+ return chip_class >= GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B230_SPI_SHADER_USER_DATA_GS_0;
+ case MESA_SHADER_COMPUTE:
+ return R_00B900_COMPUTE_USER_DATA_0;
+ case MESA_SHADER_TESS_CTRL:
+ return chip_class >= GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 :
+ R_00B430_SPI_SHADER_USER_DATA_HS_0;
+ case MESA_SHADER_TESS_EVAL:
+ if (chip_class >= GFX9) {
+ return has_gs ? R_00B330_SPI_SHADER_USER_DATA_ES_0 :
+ R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ }
+ if (has_gs)
+ return R_00B330_SPI_SHADER_USER_DATA_ES_0;
+ else
+ return R_00B130_SPI_SHADER_USER_DATA_VS_0;
+ default:
+ unreachable("unknown shader");
+ }
+}
+
+struct radv_bin_size_entry {
+ unsigned bpp;
+ VkExtent2D extent;
+};
+
+static VkExtent2D
+radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ static const struct radv_bin_size_entry color_size_table[][3][9] = {
+ {
+ /* One RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 128}},
+ { 1, { 64, 128}},
+ { 2, { 32, 128}},
+ { 3, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {128, 128}},
+ { 2, { 64, 128}},
+ { 3, { 32, 128}},
+ { 5, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 16, 128}},
+ { 17, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ /* Two RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 128}},
+ { 2, { 64, 128}},
+ { 3, { 32, 128}},
+ { 5, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 32, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 3, {128, 128}},
+ { 5, { 64, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ /* Four RB / SE */
+ {
+ /* One shader engine */
+ { 0, {128, 256}},
+ { 2, {128, 128}},
+ { 3, { 64, 128}},
+ { 5, { 32, 128}},
+ { 9, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Two shader engines */
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 3, {128, 128}},
+ { 5, { 64, 128}},
+ { 9, { 32, 128}},
+ { 17, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ /* Four shader engines */
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 3, {128, 256}},
+ { 5, {128, 128}},
+ { 9, { 64, 128}},
+ { 17, { 16, 128}},
+ { 33, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ };
+ static const struct radv_bin_size_entry ds_size_table[][3][9] = {
+ {
+ // One RB / SE
+ {
+ // One shader engine
+ { 0, {128, 256}},
+ { 2, {128, 128}},
+ { 4, { 64, 128}},
+ { 7, { 32, 128}},
+ { 13, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 4, {128, 128}},
+ { 7, { 64, 128}},
+ { 13, { 32, 128}},
+ { 25, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 16, 128}},
+ { 49, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ // Two RB / SE
+ {
+ // One shader engine
+ { 0, {256, 256}},
+ { 2, {128, 256}},
+ { 4, {128, 128}},
+ { 7, { 64, 128}},
+ { 13, { 32, 128}},
+ { 25, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 32, 128}},
+ { 49, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {512, 512}},
+ { 2, {256, 512}},
+ { 4, {256, 256}},
+ { 7, {128, 256}},
+ { 13, {128, 128}},
+ { 25, { 64, 128}},
+ { 49, { 16, 128}},
+ { 97, { 0, 0}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ {
+ // Four RB / SE
+ {
+ // One shader engine
+ { 0, {256, 512}},
+ { 2, {256, 256}},
+ { 4, {128, 256}},
+ { 7, {128, 128}},
+ { 13, { 64, 128}},
+ { 25, { 32, 128}},
+ { 49, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Two shader engines
+ { 0, {512, 512}},
+ { 2, {256, 512}},
+ { 4, {256, 256}},
+ { 7, {128, 256}},
+ { 13, {128, 128}},
+ { 25, { 64, 128}},
+ { 49, { 32, 128}},
+ { 97, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ {
+ // Four shader engines
+ { 0, {512, 512}},
+ { 4, {256, 512}},
+ { 7, {256, 256}},
+ { 13, {128, 256}},
+ { 25, {128, 128}},
+ { 49, { 64, 128}},
+ { 97, { 16, 128}},
+ { UINT_MAX, { 0, 0}},
+ },
+ },
+ };
+
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ VkExtent2D extent = {512, 512};
+
+ unsigned log_num_rb_per_se =
+ util_logbase2_ceil(pipeline->device->physical_device->rad_info.num_render_backends /
+ pipeline->device->physical_device->rad_info.max_se);
+ unsigned log_num_se = util_logbase2_ceil(pipeline->device->physical_device->rad_info.max_se);
+
+ unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->graphics.ms.pa_sc_mode_cntl_1);
+ unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->graphics.ms.db_eqaa);
+ unsigned effective_samples = total_samples;
+ unsigned color_bytes_per_pixel = 0;
+
+ const VkPipelineColorBlendStateCreateInfo *vkblend = pCreateInfo->pColorBlendState;
+ if (vkblend) {
+ for (unsigned i = 0; i < subpass->color_count; i++) {
+ if (!vkblend->pAttachments[i].colorWriteMask)
+ continue;
+
+ if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ VkFormat format = pass->attachments[subpass->color_attachments[i].attachment].format;
+ color_bytes_per_pixel += vk_format_get_blocksize(format);
+ }
+
+ /* MSAA images typically don't use all samples all the time. */
+ if (effective_samples >= 2 && ps_iter_samples <= 1)
+ effective_samples = 2;
+ color_bytes_per_pixel *= effective_samples;
+ }
+
+ const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se];
+ while(color_entry->bpp <= color_bytes_per_pixel)
+ ++color_entry;
+
+ extent = color_entry->extent;
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+ struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+
+ /* Coefficients taken from AMDVLK */
+ unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
+ unsigned stencil_coeff = vk_format_is_stencil(attachment->format) ? 1 : 0;
+ unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples;
+
+ const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se];
+ while(ds_entry->bpp <= ds_bytes_per_pixel)
+ ++ds_entry;
+
+ extent.width = MIN2(extent.width, ds_entry->extent.width);
+ extent.height = MIN2(extent.height, ds_entry->extent.height);
+ }
+
+ return extent;
+}
+
+static void
+radv_pipeline_generate_binning_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo)
+{
+ if (pipeline->device->physical_device->rad_info.chip_class < GFX9)
+ return;
+
+ uint32_t pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1);
+ uint32_t db_dfsm_control = S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF);
+
+ VkExtent2D bin_size = radv_compute_bin_size(pipeline, pCreateInfo);
+
+ unsigned context_states_per_bin; /* allowed range: [1, 6] */
+ unsigned persistent_states_per_bin; /* allowed range: [1, 32] */
+ unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
+
+ switch (pipeline->device->physical_device->rad_info.family) {
+ case CHIP_VEGA10:
+ context_states_per_bin = 1;
+ persistent_states_per_bin = 1;
+ fpovs_per_batch = 63;
+ break;
+ case CHIP_RAVEN:
+ context_states_per_bin = 6;
+ persistent_states_per_bin = 32;
+ fpovs_per_batch = 63;
+ break;
+ default:
+ unreachable("unhandled family while determining binning state.");
+ }
+
+ if (pipeline->device->pbb_allowed && bin_size.width && bin_size.height) {
+ pa_sc_binner_cntl_0 =
+ S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) |
+ S_028C44_BIN_SIZE_X(bin_size.width == 16) |
+ S_028C44_BIN_SIZE_Y(bin_size.height == 16) |
+ S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) |
+ S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) |
+ S_028C44_CONTEXT_STATES_PER_BIN(context_states_per_bin - 1) |
+ S_028C44_PERSISTENT_STATES_PER_BIN(persistent_states_per_bin - 1) |
+ S_028C44_DISABLE_START_OF_PRIM(1) |
+ S_028C44_FPOVS_PER_BATCH(fpovs_per_batch) |
+ S_028C44_OPTIMAL_BIN_SELECTION(1);
+ }
+
+ radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+ pa_sc_binner_cntl_0);
+ radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+ db_dfsm_control);
+}
+
+
+static void
+radv_pipeline_generate_depth_stencil_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const struct radv_graphics_pipeline_create_info *extra)
+{
+ const VkPipelineDepthStencilStateCreateInfo *vkds = pCreateInfo->pDepthStencilState;
+ RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
+ struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
+ struct radv_render_pass_attachment *attachment = NULL;
+ uint32_t db_depth_control = 0, db_stencil_control = 0;
+ uint32_t db_render_control = 0, db_render_override2 = 0;
+
+ if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED)
+ attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+
+ bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format);
+ bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format);
+
+ if (vkds && has_depth_attachment) {
+ db_depth_control = S_028800_Z_ENABLE(vkds->depthTestEnable ? 1 : 0) |
+ S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) |
+ S_028800_ZFUNC(vkds->depthCompareOp) |
+ S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0);
+
+ /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
+ db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
+ }
+
+ if (has_stencil_attachment && vkds && vkds->stencilTestEnable) {
+ db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1);
+ db_depth_control |= S_028800_STENCILFUNC(vkds->front.compareOp);
+ db_stencil_control |= S_02842C_STENCILFAIL(si_translate_stencil_op(vkds->front.failOp));
+ db_stencil_control |= S_02842C_STENCILZPASS(si_translate_stencil_op(vkds->front.passOp));
+ db_stencil_control |= S_02842C_STENCILZFAIL(si_translate_stencil_op(vkds->front.depthFailOp));
+
+ db_depth_control |= S_028800_STENCILFUNC_BF(vkds->back.compareOp);
+ db_stencil_control |= S_02842C_STENCILFAIL_BF(si_translate_stencil_op(vkds->back.failOp));
+ db_stencil_control |= S_02842C_STENCILZPASS_BF(si_translate_stencil_op(vkds->back.passOp));
+ db_stencil_control |= S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(vkds->back.depthFailOp));
+ }
+
+ if (attachment && extra) {
+ db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear);
+ db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear);
+
+ db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->db_resummarize);
+ db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->db_flush_depth_inplace);
+ db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->db_flush_stencil_inplace);
+ db_render_override2 |= S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(extra->db_depth_disable_expclear);
+ db_render_override2 |= S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(extra->db_stencil_disable_expclear);
+ }
+
+ radeon_set_context_reg(cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
+ radeon_set_context_reg(cs, R_02842C_DB_STENCIL_CONTROL, db_stencil_control);
+
+ radeon_set_context_reg(cs, R_028000_DB_RENDER_CONTROL, db_render_control);
+ radeon_set_context_reg(cs, R_028010_DB_RENDER_OVERRIDE2, db_render_override2);
+}
+
+static void
+radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_blend_state *blend)
+{
+ radeon_set_context_reg_seq(cs, R_028780_CB_BLEND0_CONTROL, 8);
+ radeon_emit_array(cs, blend->cb_blend_control,
+ 8);
+ radeon_set_context_reg(cs, R_028808_CB_COLOR_CONTROL, blend->cb_color_control);
+ radeon_set_context_reg(cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask);
+
+ if (pipeline->device->physical_device->has_rbplus) {
+
+ radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
+ radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
+
+ radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
+ radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */
+ radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */
+ radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */
+ }
+
+ radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
+
+ radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
+ radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
+}
+
+
+static void
+radv_pipeline_generate_raster_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_raster_state *raster = &pipeline->graphics.raster;
+
+ radeon_set_context_reg(cs, R_028810_PA_CL_CLIP_CNTL,
+ raster->pa_cl_clip_cntl);
+ radeon_set_context_reg(cs, R_0286D4_SPI_INTERP_CONTROL_0,
+ raster->spi_interp_control);
+ radeon_set_context_reg(cs, R_028BE4_PA_SU_VTX_CNTL,
+ raster->pa_su_vtx_cntl);
+ radeon_set_context_reg(cs, R_028814_PA_SU_SC_MODE_CNTL,
+ raster->pa_su_sc_mode_cntl);
+}
+
+
+static void
+radv_pipeline_generate_multisample_state(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_multisample_state *ms = &pipeline->graphics.ms;
+
+ radeon_set_context_reg_seq(cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2);
+ radeon_emit(cs, ms->pa_sc_aa_mask[0]);
+ radeon_emit(cs, ms->pa_sc_aa_mask[1]);
+
+ radeon_set_context_reg(cs, R_028804_DB_EQAA, ms->db_eqaa);
+ radeon_set_context_reg(cs, R_028A4C_PA_SC_MODE_CNTL_1, ms->pa_sc_mode_cntl_1);
+
+ if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.info.ps.needs_sample_positions) {
+ uint32_t offset;
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_FRAGMENT, AC_UD_PS_SAMPLE_POS_OFFSET);
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_FRAGMENT];
+ if (loc->sgpr_idx == -1)
+ return;
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+ switch (pipeline->graphics.ms.num_samples) {
+ default:
+ offset = 0;
+ break;
+ case 2:
+ offset = 1;
+ break;
+ case 4:
+ offset = 3;
+ break;
+ case 8:
+ offset = 7;
+ break;
+ case 16:
+ offset = 15;
+ break;
+ }
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, offset);
+ }
+}
+
+static void
+radv_pipeline_generate_vgt_gs_mode(struct radeon_winsys_cs *cs,
+ const struct radv_pipeline *pipeline)
+{
+ const struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
+
+ uint32_t vgt_primitiveid_en = false;
+ uint32_t vgt_gs_mode = 0;
+
+ if (radv_pipeline_has_gs(pipeline)) {
+ const struct radv_shader_variant *gs =
+ pipeline->shaders[MESA_SHADER_GEOMETRY];
+
+ vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out,
+ pipeline->device->physical_device->rad_info.chip_class);
+ } else if (outinfo->export_prim_id) {
+ vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+ vgt_primitiveid_en = true;
+ }
+
+ radeon_set_context_reg(cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en);
+ radeon_set_context_reg(cs, R_028A40_VGT_GS_MODE, vgt_gs_mode);
+}
+
+static void
+radv_pipeline_generate_hw_vs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+
+ const struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ unsigned clip_dist_mask, cull_dist_mask, total_mask;
+ clip_dist_mask = outinfo->clip_dist_mask;
+ cull_dist_mask = outinfo->cull_dist_mask;
+ total_mask = clip_dist_mask | cull_dist_mask;
+ bool misc_vec_ena = outinfo->writes_pointsize ||
+ outinfo->writes_layer ||
+ outinfo->writes_viewport_index;
+
+ radeon_set_context_reg(cs, R_0286C4_SPI_VS_OUT_CONFIG,
+ S_0286C4_VS_EXPORT_COUNT(MAX2(1, outinfo->param_exports) - 1));
+
+ radeon_set_context_reg(cs, R_02870C_SPI_SHADER_POS_FORMAT,
+ S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) |
+ S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE) |
+ S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ?
+ V_02870C_SPI_SHADER_4COMP :
+ V_02870C_SPI_SHADER_NONE));
+
+ radeon_set_context_reg(cs, R_028818_PA_CL_VTE_CNTL,
+ S_028818_VTX_W0_FMT(1) |
+ S_028818_VPORT_X_SCALE_ENA(1) | S_028818_VPORT_X_OFFSET_ENA(1) |
+ S_028818_VPORT_Y_SCALE_ENA(1) | S_028818_VPORT_Y_OFFSET_ENA(1) |
+ S_028818_VPORT_Z_SCALE_ENA(1) | S_028818_VPORT_Z_OFFSET_ENA(1));
+
+ radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
+ S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) |
+ S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) |
+ S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) |
+ S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) |
+ S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) |
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) |
+ cull_dist_mask << 8 |
+ clip_dist_mask);
+
+ if (pipeline->device->physical_device->rad_info.chip_class <= VI)
+ radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+ outinfo->writes_viewport_index);
+}
+
+static void
+radv_pipeline_generate_hw_es(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+}
+
+static void
+radv_pipeline_generate_hw_ls(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ const struct radv_tessellation_state *tess)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+ uint32_t rsrc2 = shader->rsrc2;
+
+ radeon_set_sh_reg_seq(cs, R_00B520_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+
+ rsrc2 |= S_00B52C_LDS_SIZE(tess->lds_size);
+ if (pipeline->device->physical_device->rad_info.chip_class == CIK &&
+ pipeline->device->physical_device->rad_info.family != CHIP_HAWAII)
+ radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2);
+
+ radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, rsrc2);
+}
+
+static void
+radv_pipeline_generate_hw_hs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ struct radv_shader_variant *shader,
+ const struct radv_tessellation_state *tess)
+{
+ uint64_t va = radv_buffer_get_va(shader->bo) + shader->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cs, R_00B410_SPI_SHADER_PGM_LO_LS, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2 |
+ S_00B42C_LDS_SIZE(tess->lds_size));
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+ radeon_emit(cs, shader->rsrc1);
+ radeon_emit(cs, shader->rsrc2);
+ }
+}
+
+static void
+radv_pipeline_generate_vertex_shader(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_tessellation_state *tess)
+{
+ struct radv_shader_variant *vs;
+
+ /* Skip shaders merged into HS/GS */
+ vs = pipeline->shaders[MESA_SHADER_VERTEX];
+ if (!vs)
+ return;
+
+ if (vs->info.vs.as_ls)
+ radv_pipeline_generate_hw_ls(cs, pipeline, vs, tess);
+ else if (vs->info.vs.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, vs);
+ else
+ radv_pipeline_generate_hw_vs(cs, pipeline, vs);
+}
+
+static void
+radv_pipeline_generate_tess_shaders(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline,
+ const struct radv_tessellation_state *tess)
+{
+ if (!radv_pipeline_has_tess(pipeline))
+ return;
+
+ struct radv_shader_variant *tes, *tcs;
+
+ tcs = pipeline->shaders[MESA_SHADER_TESS_CTRL];
+ tes = pipeline->shaders[MESA_SHADER_TESS_EVAL];
+
+ if (tes) {
+ if (tes->info.tes.as_es)
+ radv_pipeline_generate_hw_es(cs, pipeline, tes);
+ else
+ radv_pipeline_generate_hw_vs(cs, pipeline, tes);
+ }
+
+ radv_pipeline_generate_hw_hs(cs, pipeline, tcs, tess);
+
+ radeon_set_context_reg(cs, R_028B6C_VGT_TF_PARAM,
+ tess->tf_param);
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= CIK)
+ radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2,
+ tess->ls_hs_config);
+ else
+ radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG,
+ tess->ls_hs_config);
+
+ struct ac_userdata_info *loc;
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_CTRL, AC_UD_TCS_OFFCHIP_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_CTRL];
+ assert(loc->num_sgprs == 4);
+ assert(!loc->indirect);
+ radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, 4);
+ radeon_emit(cs, tess->offchip_layout);
+ radeon_emit(cs, tess->tcs_out_offsets);
+ radeon_emit(cs, tess->tcs_out_layout |
+ tess->num_tcs_input_cp << 26);
+ radeon_emit(cs, tess->tcs_in_layout);
+ }
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_TESS_EVAL, AC_UD_TES_OFFCHIP_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_TESS_EVAL];
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4,
+ tess->offchip_layout);
+ }
+
+ loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX, AC_UD_VS_LS_TCS_IN_LAYOUT);
+ if (loc->sgpr_idx != -1) {
+ uint32_t base_reg = pipeline->user_data_0[MESA_SHADER_VERTEX];
+ assert(loc->num_sgprs == 1);
+ assert(!loc->indirect);
+
+ radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4,
+ tess->tcs_in_layout);
+ }
+}
+
+static void
+radv_pipeline_generate_geometry_shader(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_shader_variant *gs;
+ uint64_t va;
+
+ gs = pipeline->shaders[MESA_SHADER_GEOMETRY];
+ if (!gs)
+ return;
+
+ uint32_t gsvs_itemsize = gs->info.gs.max_gsvs_emit_size >> 2;
+
+ radeon_set_context_reg_seq(cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+ radeon_emit(cs, gsvs_itemsize);
+
+ radeon_set_context_reg(cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
+
+ radeon_set_context_reg(cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out);
+
+ uint32_t gs_vert_itemsize = gs->info.gs.gsvs_vertex_size;
+ radeon_set_context_reg_seq(cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4);
+ radeon_emit(cs, gs_vert_itemsize >> 2);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+ radeon_emit(cs, 0);
+
+ uint32_t gs_num_invocations = gs->info.gs.invocations;
+ radeon_set_context_reg(cs, R_028B90_VGT_GS_INSTANCE_CNT,
+ S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
+ S_028B90_ENABLE(gs_num_invocations > 0));
+
+ radeon_set_context_reg(cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
+ pipeline->graphics.gs.vgt_esgs_ring_itemsize);
+
+ va = radv_buffer_get_va(gs->bo) + gs->bo_offset;
+
+ if (pipeline->device->physical_device->rad_info.chip_class >= GFX9) {
+ radeon_set_sh_reg_seq(cs, R_00B210_SPI_SHADER_PGM_LO_ES, 2);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+
+ radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2);
+ radeon_emit(cs, gs->rsrc1);
+ radeon_emit(cs, gs->rsrc2 |
+ S_00B22C_LDS_SIZE(pipeline->graphics.gs.lds_size));
+
+ radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL, pipeline->graphics.gs.vgt_gs_onchip_cntl);
+ radeon_set_context_reg(cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, pipeline->graphics.gs.vgt_gs_max_prims_per_subgroup);
+ } else {
+ radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4);
+ radeon_emit(cs, va >> 8);
+ radeon_emit(cs, va >> 40);
+ radeon_emit(cs, gs->rsrc1);
+ radeon_emit(cs, gs->rsrc2);
+ }
+
+ radv_pipeline_generate_hw_vs(cs, pipeline, pipeline->gs_copy_shader);
+
+ struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_GEOMETRY,
+ AC_UD_GS_VS_RING_STRIDE_ENTRIES);
+ if (loc->sgpr_idx != -1) {
+ uint32_t stride = gs->info.gs.max_gsvs_emit_size;
+ uint32_t num_entries = 64;
+ bool is_vi = pipeline->device->physical_device->rad_info.chip_class >= VI;
+
+ if (is_vi)
+ num_entries *= stride;
+
+ stride = S_008F04_STRIDE(stride);
+ radeon_set_sh_reg_seq(cs, R_00B230_SPI_SHADER_USER_DATA_GS_0 + loc->sgpr_idx * 4, 2);
+ radeon_emit(cs, stride);
+ radeon_emit(cs, num_entries);
+ }
+}
+
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+{
+ uint32_t ps_input_cntl;
+ if (offset <= AC_EXP_PARAM_OFFSET_31) {
+ ps_input_cntl = S_028644_OFFSET(offset);
+ if (flat_shade)
+ ps_input_cntl |= S_028644_FLAT_SHADE(1);
+ } else {
+ /* The input is a DEFAULT_VAL constant. */
+ assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+ offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+ offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+ ps_input_cntl = S_028644_OFFSET(0x20) |
+ S_028644_DEFAULT_VAL(offset);
+ }
+ return ps_input_cntl;
+}
+
+static void
+radv_pipeline_generate_ps_inputs(struct radeon_winsys_cs *cs,
+ struct radv_pipeline *pipeline)
+{
+ struct radv_shader_variant *ps = pipeline->shaders[MESA_SHADER_FRAGMENT];
+ const struct ac_vs_output_info *outinfo = get_vs_output_info(pipeline);
+ uint32_t ps_input_cntl[32];
+
+ unsigned ps_offset = 0;
+
+ if (ps->info.fs.prim_id_input) {
+ unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ ++ps_offset;
+ }
+ }
+
+ if (ps->info.fs.layer_input) {
+ unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+ if (vs_offset != AC_EXP_PARAM_UNDEFINED)
+ ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+ else
+ ps_input_cntl[ps_offset] = offset_to_ps_input(AC_EXP_PARAM_DEFAULT_VAL_0000, true);
+ ++ps_offset;
+ }
+
+ if (ps->info.fs.has_pcoord) {
+ unsigned val;
+ val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
+ ps_input_cntl[ps_offset] = val;
+ ps_offset++;
+ }
+
+ for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {