X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Ffreedreno%2Fvulkan%2Ftu_pipeline.c;h=3362945728928880de0c449b5874f918ea10a130;hb=6d513eb0db25a272da65822f35907456b544f172;hp=c3a467ebbfca94c9a244e6084d7ae843b0c34472;hpb=e72201c7873ea22dadf8d1775f97400a435a8b9a;p=mesa.git diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index c3a467ebbfc..33629457289 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -325,6 +325,8 @@ tu_dynamic_state_bit(VkDynamicState state) return TU_DYNAMIC_STENCIL_WRITE_MASK; case VK_DYNAMIC_STATE_STENCIL_REFERENCE: return TU_DYNAMIC_STENCIL_REFERENCE; + case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: + return TU_DYNAMIC_SAMPLE_LOCATIONS; default: unreachable("invalid dynamic state"); return 0; @@ -646,7 +648,7 @@ tu6_emit_gs_config(struct tu_cs *cs, struct tu_shader *shader, const struct ir3_shader_variant *gs) { bool has_gs = gs->type != MESA_SHADER_NONE; - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); tu_cs_emit(cs, 0); tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_CONFIG, 2); @@ -730,7 +732,8 @@ tu6_emit_cs_config(struct tu_cs *cs, const struct tu_shader *shader, static void tu6_emit_vs_system_values(struct tu_cs *cs, const struct ir3_shader_variant *vs, - const struct ir3_shader_variant *gs) + const struct ir3_shader_variant *gs, + bool primid_passthru) { const uint32_t vertexid_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); @@ -753,7 +756,7 @@ tu6_emit_vs_system_values(struct tu_cs *cs, tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */ tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) | 0xfc00); /* VFD_CONTROL_5 */ - tu_cs_emit(cs, 0x00000000); /* VFD_CONTROL_6 */ + tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */ } /* Add any missing varyings needed for stream-out. Otherwise varyings not @@ -825,6 +828,10 @@ tu6_setup_streamout(const struct ir3_shader_variant *v, unsigned k = out->register_index; unsigned idx; + /* Skip it, if there's an unused reg in the middle of outputs. */ + if (v->outputs[k].regid == INVALID_REG) + continue; + tf->ncomp[out->output_buffer] += out->num_components; /* linkage map sorted by order frag shader wants things, so @@ -891,6 +898,8 @@ tu6_emit_link_map(struct tu_cs *cs, int size = DIV_ROUND_UP(num_loc, 4); size = (MIN2(size + base, consumer->constlen) - base) * 4; + if (size <= 0) + return; tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, SB6_GS_SHADER, 0, size, patch_locs); @@ -921,23 +930,22 @@ tu6_emit_vpc(struct tu_cs *cs, bool has_gs = gs->type != MESA_SHADER_NONE; const struct ir3_shader_variant *last_shader = has_gs ? gs : vs; struct ir3_shader_linkage linkage = { 0 }; - ir3_link_shaders(&linkage, last_shader, fs); + ir3_link_shaders(&linkage, last_shader, fs, true); if (last_shader->shader->stream_output.num_outputs) tu6_link_streamout(&linkage, last_shader); - BITSET_DECLARE(vpc_var_enables, 128) = { 0 }; - for (uint32_t i = 0; i < linkage.cnt; i++) { - const uint32_t comp_count = util_last_bit(linkage.var[i].compmask); - for (uint32_t j = 0; j < comp_count; j++) - BITSET_SET(vpc_var_enables, linkage.var[i].loc + j); - } + /* We do this after linking shaders in order to know whether PrimID + * passthrough needs to be enabled. + */ + bool primid_passthru = linkage.primid_loc != 0xff; + tu6_emit_vs_system_values(cs, vs, gs, primid_passthru); tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); - tu_cs_emit(cs, ~vpc_var_enables[0]); - tu_cs_emit(cs, ~vpc_var_enables[1]); - tu_cs_emit(cs, ~vpc_var_enables[2]); - tu_cs_emit(cs, ~vpc_var_enables[3]); + tu_cs_emit(cs, ~linkage.varmask[0]); + tu_cs_emit(cs, ~linkage.varmask[1]); + tu_cs_emit(cs, ~linkage.varmask[2]); + tu_cs_emit(cs, ~linkage.varmask[3]); /* a6xx finds position/pointsize at the end */ const uint32_t position_regid = @@ -990,10 +998,14 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_VPC_DST_REG(0), sp_vpc_dst_count); tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); + tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMID_CNTL, 1); + tu_cs_emit(cs, COND(primid_passthru, A6XX_PC_PRIMID_CNTL_PRIMID_PASSTHRU)); + tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs->total_in) | (fs->total_in > 0 ? A6XX_VPC_CNTL_0_VARYING : 0) | - 0xff00ff00); + A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) | + A6XX_VPC_CNTL_0_UNKLOC(0xff)); tu_cs_emit_pkt4(cs, REG_A6XX_VPC_PACK, 1); tu_cs_emit(cs, A6XX_VPC_PACK_POSITIONLOC(position_loc) | @@ -1068,7 +1080,7 @@ tu6_emit_vpc(struct tu_cs *cs, tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9B07, 1); tu_cs_emit(cs, 0); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); tu_cs_emit(cs, vs->shader->output_size); } @@ -1546,7 +1558,6 @@ tu6_emit_program(struct tu_cs *cs, tu6_emit_gs_config(cs, builder->shaders[MESA_SHADER_GEOMETRY], gs); tu6_emit_fs_config(cs, builder->shaders[MESA_SHADER_FRAGMENT], fs); - tu6_emit_vs_system_values(cs, vs, gs); tu6_emit_vpc(cs, vs, gs, fs, binning_pass, tf); tu6_emit_vpc_varying_modes(cs, fs, binning_pass); tu6_emit_fs_inputs(cs, fs); @@ -1733,6 +1744,47 @@ tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissor) A6XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(max.y - 1)); } +void +tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc) +{ + if (!samp_loc) { + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1); + tu_cs_emit(cs, 0); + return; + } + + assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount); + assert(samp_loc->sampleLocationGridSize.width == 1); + assert(samp_loc->sampleLocationGridSize.height == 1); + + uint32_t sample_config = + A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE; + uint32_t sample_locations = 0; + for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) { + sample_locations |= + (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) | + A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8; + } + + tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); + + tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2); + tu_cs_emit(cs, sample_config); + tu_cs_emit(cs, sample_locations); +} + static void tu6_emit_gras_unknowns(struct tu_cs *cs) { @@ -2064,7 +2116,7 @@ tu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder) for (gl_shader_stage stage = MESA_SHADER_STAGES - 1; stage > MESA_SHADER_NONE; stage--) { const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; - if (!stage_info) + if (!stage_info && stage != MESA_SHADER_FRAGMENT) continue; struct tu_shader *shader = @@ -2405,7 +2457,7 @@ tu_pipeline_builder_parse_multisample_and_color_blend( : &dummy_blend_info; struct tu_cs blend_cs; - tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 9, &blend_cs); + tu_cs_begin_sub_stream(&pipeline->cs, MAX_RTS * 3 + 18, &blend_cs); uint32_t blend_enable_mask; tu6_emit_rb_mrt_controls(&blend_cs, blend_info, @@ -2415,6 +2467,17 @@ tu_pipeline_builder_parse_multisample_and_color_blend( if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_BLEND_CONSTANTS)) tu6_emit_blend_constants(&blend_cs, blend_info->blendConstants); + if (!(pipeline->dynamic_state.mask & TU_DYNAMIC_SAMPLE_LOCATIONS)) { + const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = + vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); + const VkSampleLocationsInfoEXT *samp_loc = NULL; + + if (sample_locations && sample_locations->sampleLocationsEnable) + samp_loc = &sample_locations->sampleLocationsInfo; + + tu6_emit_sample_locations(&blend_cs, samp_loc); + } + tu6_emit_blend_control(&blend_cs, blend_enable_mask, msaa_info); pipeline->blend.state_ib = tu_cs_end_sub_stream(&pipeline->cs, &blend_cs);