X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2FgenX_pipeline.c;h=9c08bc203310e394f80c3d4fdd0abe6a0b0603ae;hb=1deb7967c807799efc4b6057bac33b87340b7608;hp=6068a4310868e930549fd704a1665dd0941e4483;hpb=3dbd7737d44345b1ae3629d50b009307534a84a6;p=mesa.git diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 6068a431086..9c08bc20331 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -28,6 +28,7 @@ #include "common/gen_l3_config.h" #include "common/gen_sample_positions.h" +#include "vk_util.h" #include "vk_format_info.h" static uint32_t @@ -155,7 +156,7 @@ emit_vertex_input(struct anv_pipeline *pipeline, anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VF_INSTANCING), vfi) { vfi.InstancingEnable = pipeline->instancing_enable[desc->binding]; vfi.VertexElementIndex = slot; - /* Our implementation of VK_KHX_multiview uses instancing to draw + /* Our implementation of VK_KHR_multiview uses instancing to draw * the different views. If the client asks for instancing, we * need to use the Instance Data Step Rate to ensure that we * repeat the client's per-instance data once for each view. @@ -281,7 +282,7 @@ genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch, } } -static inline void +static void emit_urb_setup(struct anv_pipeline *pipeline) { unsigned entry_size[4]; @@ -378,8 +379,8 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) /* We have to subtract two slots to accout for the URB entry output * read offset in the VS and GS stages. */ - assert(slot >= 2); const int source_attr = slot - 2 * urb_entry_read_offset; + assert(source_attr >= 0 && source_attr < 32); max_source_attr = MAX2(max_source_attr, source_attr); swiz.Attribute[input_index].SourceAttribute = source_attr; } @@ -552,6 +553,7 @@ emit_ms_state(struct anv_pipeline *pipeline, anv_batch_emit(&pipeline->batch, GENX(3DSTATE_MULTISAMPLE), ms) { ms.NumberofMultisamples = log2_samples; + ms.PixelLocation = CENTER; #if GEN_GEN >= 8 /* The PRM says that this bit is valid only for DX9: * @@ -559,9 +561,7 @@ emit_ms_state(struct anv_pipeline *pipeline, * should not have any effect by setting or not setting this bit. */ ms.PixelPositionOffsetEnable = false; - ms.PixelLocation = CENTER; #else - ms.PixelLocation = PIXLOC_CENTER; switch (samples) { case 1: @@ -1063,7 +1063,8 @@ emit_3dstate_clip(struct anv_pipeline *pipeline, } #else clip.NonPerspectiveBarycentricEnable = wm_prog_data ? - (wm_prog_data->barycentric_interp_modes & 0x38) != 0 : 0; + (wm_prog_data->barycentric_interp_modes & + BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) != 0 : 0; #endif } } @@ -1077,19 +1078,25 @@ emit_3dstate_streamout(struct anv_pipeline *pipeline, } } -static inline uint32_t +static uint32_t get_sampler_count(const struct anv_shader_bin *bin) { - return DIV_ROUND_UP(bin->bind_map.sampler_count, 4); + uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4); + + /* We can potentially have way more than 32 samplers and that's ok. + * However, the 3DSTATE_XS packets only have 3 bits to specify how + * many to pre-fetch and all values above 4 are marked reserved. + */ + return MIN2(count_by_4, 4); } -static inline uint32_t +static uint32_t get_binding_table_entry_count(const struct anv_shader_bin *bin) { return DIV_ROUND_UP(bin->bind_map.surface_count, 32); } -static inline struct anv_address +static struct anv_address get_scratch_address(struct anv_pipeline *pipeline, gl_shader_stage stage, const struct anv_shader_bin *bin) @@ -1102,28 +1109,12 @@ get_scratch_address(struct anv_pipeline *pipeline, }; } -static inline uint32_t +static uint32_t get_scratch_space(const struct anv_shader_bin *bin) { return ffs(bin->prog_data->total_scratch / 2048); } -static inline uint32_t -get_urb_output_offset() -{ - /* Skip the VUE header and position slots */ - return 1; -} - -static inline uint32_t -get_urb_output_length(const struct anv_shader_bin *bin) -{ - const struct brw_vue_prog_data *prog_data = - (const struct brw_vue_prog_data *)bin->prog_data; - - return (prog_data->vue_map.num_slots + 1) / 2 - get_urb_output_offset(); -} - static void emit_3dstate_vs(struct anv_pipeline *pipeline) { @@ -1135,7 +1126,7 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX)); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_VS), vs) { - vs.FunctionEnable = true; + vs.Enable = true; vs.StatisticsEnable = true; vs.KernelStartPointer = vs_bin->kernel.offset; #if GEN_GEN >= 8 @@ -1144,7 +1135,9 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) #endif assert(!vs_prog_data->base.base.use_alt_mode); +#if GEN_GEN < 11 vs.SingleVertexDispatch = false; +#endif vs.VectorMaskEnable = false; vs.SamplerCount = get_sampler_count(vs_bin); vs.BindingTableEntryCount = get_binding_table_entry_count(vs_bin); @@ -1160,9 +1153,6 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) vs_prog_data->base.base.dispatch_grf_start_reg; #if GEN_GEN >= 8 - vs.VertexURBEntryOutputReadOffset = get_urb_output_offset(); - vs.VertexURBEntryOutputLength = get_urb_output_length(vs_bin); - vs.UserClipDistanceClipTestEnableBitmask = vs_prog_data->base.clip_distance_mask; vs.UserClipDistanceCullTestEnableBitmask = @@ -1176,7 +1166,8 @@ emit_3dstate_vs(struct anv_pipeline *pipeline) } static void -emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline) +emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline, + const VkPipelineTessellationStateCreateInfo *tess_info) { if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs); @@ -1195,7 +1186,7 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline) const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_HS), hs) { - hs.FunctionEnable = true; + hs.Enable = true; hs.StatisticsEnable = true; hs.KernelStartPointer = tcs_bin->kernel.offset; @@ -1215,9 +1206,29 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline) get_scratch_address(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); } + const VkPipelineTessellationDomainOriginStateCreateInfoKHR *domain_origin_state = + tess_info ? vk_find_struct_const(tess_info, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO_KHR) : NULL; + + VkTessellationDomainOriginKHR uv_origin = + domain_origin_state ? domain_origin_state->domainOrigin : + VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT_KHR; + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_TE), te) { te.Partitioning = tes_prog_data->partitioning; - te.OutputTopology = tes_prog_data->output_topology; + + if (uv_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT_KHR) { + te.OutputTopology = tes_prog_data->output_topology; + } else { + /* When the origin is upper-left, we have to flip the winding order */ + if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) { + te.OutputTopology = OUTPUT_TRI_CW; + } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) { + te.OutputTopology = OUTPUT_TRI_CCW; + } else { + te.OutputTopology = tes_prog_data->output_topology; + } + } + te.TEDomain = tes_prog_data->domain; te.TEEnable = true; te.MaximumTessellationFactorOdd = 63.0; @@ -1225,7 +1236,7 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline) } anv_batch_emit(&pipeline->batch, GENX(3DSTATE_DS), ds) { - ds.FunctionEnable = true; + ds.Enable = true; ds.StatisticsEnable = true; ds.KernelStartPointer = tes_bin->kernel.offset; @@ -1242,14 +1253,15 @@ emit_3dstate_hs_te_ds(struct anv_pipeline *pipeline) tes_prog_data->base.base.dispatch_grf_start_reg; #if GEN_GEN >= 8 - ds.VertexURBEntryOutputReadOffset = 1; - ds.VertexURBEntryOutputLength = - (tes_prog_data->base.vue_map.num_slots + 1) / 2 - 1; - +#if GEN_GEN < 11 ds.DispatchMode = tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ? DISPATCH_MODE_SIMD8_SINGLE_PATCH : DISPATCH_MODE_SIMD4X2; +#else + assert(tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8); + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; +#endif ds.UserClipDistanceClipTestEnableBitmask = tes_prog_data->base.clip_distance_mask; @@ -1278,7 +1290,7 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) const struct brw_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_GS), gs) { - gs.FunctionEnable = true; + gs.Enable = true; gs.StatisticsEnable = true; gs.KernelStartPointer = gs_bin->kernel.offset; gs.DispatchMode = gs_prog_data->base.dispatch_mode; @@ -1303,11 +1315,7 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) gs.ControlDataFormat = gs_prog_data->control_data_format; gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1; -#if GEN_GEN >= 8 || GEN_IS_HASWELL gs.ReorderMode = TRAILING; -#else - gs.ReorderEnable = true; -#endif #if GEN_GEN >= 8 gs.ExpectedVertexCount = gs_prog_data->vertices_in; @@ -1322,9 +1330,6 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) gs_prog_data->base.base.dispatch_grf_start_reg; #if GEN_GEN >= 8 - gs.VertexURBEntryOutputReadOffset = get_urb_output_offset(); - gs.VertexURBEntryOutputLength = get_urb_output_length(gs_bin); - gs.UserClipDistanceClipTestEnableBitmask = gs_prog_data->base.clip_distance_mask; gs.UserClipDistanceCullTestEnableBitmask = @@ -1337,8 +1342,9 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) } } -static inline bool -has_color_buffer_write_enabled(const struct anv_pipeline *pipeline) +static bool +has_color_buffer_write_enabled(const struct anv_pipeline *pipeline, + const VkPipelineColorBlendStateCreateInfo *blend) { const struct anv_shader_bin *shader_bin = pipeline->shaders[MESA_SHADER_FRAGMENT]; @@ -1347,10 +1353,15 @@ has_color_buffer_write_enabled(const struct anv_pipeline *pipeline) const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map; for (int i = 0; i < bind_map->surface_count; i++) { - if (bind_map->surface_to_descriptor[i].set != - ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) + struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i]; + + if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS) continue; - if (bind_map->surface_to_descriptor[i].index != UINT8_MAX) + + if (binding->index == UINT32_MAX) + continue; + + if (blend->pAttachments[binding->index].colorWriteMask != 0) return true; } @@ -1359,6 +1370,7 @@ has_color_buffer_write_enabled(const struct anv_pipeline *pipeline) static void emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, + const VkPipelineColorBlendStateCreateInfo *blend, const VkPipelineMultisampleStateCreateInfo *multisample) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -1403,7 +1415,7 @@ emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF || wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel || - has_color_buffer_write_enabled(pipeline)) + has_color_buffer_write_enabled(pipeline, blend)) wm.ThreadDispatchEnable = true; if (samples > 1) { @@ -1422,7 +1434,7 @@ emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass, } } -static inline bool +UNUSED static bool is_dual_src_blend_factor(VkBlendFactor factor) { return factor == VK_BLEND_FACTOR_SRC1_COLOR || @@ -1488,7 +1500,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline, ps.VectorMaskEnable = true; ps.SamplerCount = get_sampler_count(fs_bin); ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin); - ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; + ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 || + wm_prog_data->base.ubo_ranges[0].length; ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE: POSOFFSET_NONE; #if GEN_GEN < 8 @@ -1527,7 +1540,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline, #if GEN_GEN >= 8 static void emit_3dstate_ps_extra(struct anv_pipeline *pipeline, - struct anv_subpass *subpass) + struct anv_subpass *subpass, + const VkPipelineColorBlendStateCreateInfo *blend) { const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); @@ -1582,7 +1596,7 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline, * attachments, we need to force-enable here. */ if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) && - !has_color_buffer_write_enabled(pipeline)) + !has_color_buffer_write_enabled(pipeline, blend)) ps.PixelShaderHasUAV = true; #if GEN_GEN >= 9 @@ -1704,18 +1718,19 @@ genX(graphics_pipeline_create)( * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS * Stall" bit set. */ - if (!brw->is_haswell && !brw->is_baytrail) + if (!device->info.is_haswell && !device->info.is_baytrail) gen7_emit_vs_workaround_flush(brw); #endif emit_3dstate_vs(pipeline); - emit_3dstate_hs_te_ds(pipeline); + emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState); emit_3dstate_gs(pipeline); emit_3dstate_sbe(pipeline); - emit_3dstate_wm(pipeline, subpass, pCreateInfo->pMultisampleState); + emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState, + pCreateInfo->pMultisampleState); emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState); #if GEN_GEN >= 8 - emit_3dstate_ps_extra(pipeline, subpass); + emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState); emit_3dstate_vf_topology(pipeline); #endif emit_3dstate_vf_statistics(pipeline); @@ -1748,7 +1763,6 @@ compute_pipeline_create( return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); pipeline->blend_state.map = NULL; @@ -1813,7 +1827,9 @@ compute_pipeline_create( vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1; vfe.NumberofURBEntries = GEN_GEN <= 7 ? 0 : 2; +#if GEN_GEN < 11 vfe.ResetGatewayTimer = true; +#endif #if GEN_GEN <= 8 vfe.BypassGatewayControl = true; #endif