From 23ad9982460fe563f8e5d94eb38f3ca5b4dd1a36 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sat, 12 Nov 2016 09:42:58 -0800 Subject: [PATCH] anv/pipeline: Unify 3DSTATE_PS emission Reviewed-by: Kristian H. Kristensen Reviewed-by: Timothy Arceri --- src/intel/vulkan/gen7_pipeline.c | 60 +-------------------- src/intel/vulkan/gen8_pipeline.c | 38 +------------- src/intel/vulkan/genX_pipeline_util.h | 76 ++++++++++++++++++++++++--- 3 files changed, 72 insertions(+), 102 deletions(-) diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 556b2a57cae..0c8baf6e743 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -44,9 +44,6 @@ genX(graphics_pipeline_create)( { ANV_FROM_HANDLE(anv_device, device, _device); ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); - const struct anv_physical_device *physical_device = - &device->instance->physicalDevice; - const struct gen_device_info *devinfo = &physical_device->info; struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; struct anv_pipeline *pipeline; VkResult result; @@ -107,6 +104,7 @@ genX(graphics_pipeline_create)( emit_3dstate_vs(pipeline); emit_3dstate_gs(pipeline); emit_3dstate_sbe(pipeline); + emit_3dstate_ps(pipeline); if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) { @@ -117,16 +115,7 @@ genX(graphics_pipeline_create)( wm.EarlyDepthStencilControl = EDSC_NORMAL; wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT; } - - /* Even if no fragments are ever dispatched, the hardware hangs if we - * don't at least set the maximum number of threads. - */ - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { - ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; - } } else { - const struct anv_shader_bin *fs_bin = - pipeline->shaders[MESA_SHADER_FRAGMENT]; const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 || @@ -135,53 +124,6 @@ genX(graphics_pipeline_create)( if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1) anv_finishme("primitive_id needs sbe swizzling setup"); - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { - ps.KernelStartPointer0 = fs_bin->kernel.offset; - ps.KernelStartPointer1 = 0; - ps.KernelStartPointer2 = fs_bin->kernel.offset + - wm_prog_data->prog_offset_2; - - ps.ScratchSpaceBasePointer = (struct anv_address) { - .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, - MESA_SHADER_FRAGMENT, - wm_prog_data->base.total_scratch), - .offset = 0, - }; - ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base); - - ps.SamplerCount = get_sampler_count(fs_bin); - ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin); - - ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; - ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; - ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; - ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; - - ps.RenderTargetFastClearEnable = false; - ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend; - ps.RenderTargetResolveEnable = false; - - ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE : POSOFFSET_NONE; - - ps._32PixelDispatchEnable = false; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - wm_prog_data->base.dispatch_grf_start_reg, - ps.DispatchGRFStartRegisterForConstantSetupData1 = 0, - ps.DispatchGRFStartRegisterForConstantSetupData2 = - wm_prog_data->dispatch_grf_start_reg_2; - - /* Haswell requires the sample mask to be set in this packet as well as - * in 3DSTATE_SAMPLE_MASK; the values should match. */ - /* _NEW_BUFFERS, _NEW_MULTISAMPLE */ -#if GEN_IS_HASWELL - ps.SampleMask = 0xff; -#endif - } - uint32_t samples = pCreateInfo->pMultisampleState ? pCreateInfo->pMultisampleState->rasterizationSamples : 1; diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index f2499dc0f53..56eb0324b93 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -112,49 +112,13 @@ genX(graphics_pipeline_create)( emit_3dstate_gs(pipeline); emit_3dstate_vs(pipeline); emit_3dstate_sbe(pipeline); + emit_3dstate_ps(pipeline); - const int num_thread_bias = GEN_GEN == 8 ? 2 : 1; if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps); anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) { extra.PixelShaderValid = false; } } else { - const struct anv_shader_bin *fs_bin = - pipeline->shaders[MESA_SHADER_FRAGMENT]; - - anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { - ps.KernelStartPointer0 = fs_bin->kernel.offset; - ps.KernelStartPointer1 = 0; - ps.KernelStartPointer2 = fs_bin->kernel.offset + - wm_prog_data->prog_offset_2; - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = false; - ps.SingleProgramFlow = false; - ps.VectorMaskEnable = true; - ps.SamplerCount = get_sampler_count(fs_bin); - ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin); - ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; - ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? - POSOFFSET_SAMPLE: POSOFFSET_NONE; - - ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias; - - ps.ScratchSpaceBasePointer = (struct anv_address) { - .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, - MESA_SHADER_FRAGMENT, - wm_prog_data->base.total_scratch), - .offset = 0, - }; - ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base); - - ps.DispatchGRFStartRegisterForConstantSetupData0 = - wm_prog_data->base.dispatch_grf_start_reg; - ps.DispatchGRFStartRegisterForConstantSetupData1 = 0; - ps.DispatchGRFStartRegisterForConstantSetupData2 = - wm_prog_data->dispatch_grf_start_reg_2; - } anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) { ps.PixelShaderValid = true; diff --git a/src/intel/vulkan/genX_pipeline_util.h b/src/intel/vulkan/genX_pipeline_util.h index 3906529713f..957b580a657 100644 --- a/src/intel/vulkan/genX_pipeline_util.h +++ b/src/intel/vulkan/genX_pipeline_util.h @@ -455,12 +455,6 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline) #endif } -static inline uint32_t -scratch_space(const struct brw_stage_prog_data *prog_data) -{ - return ffs(prog_data->total_scratch / 2048); -} - static const uint32_t vk_to_gen_cullmode[] = { [VK_CULL_MODE_NONE] = CULLMODE_NONE, [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT, @@ -1134,4 +1128,74 @@ emit_3dstate_gs(struct anv_pipeline *pipeline) } } +static void +emit_3dstate_ps(struct anv_pipeline *pipeline) +{ + MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info; + const struct anv_shader_bin *fs_bin = + pipeline->shaders[MESA_SHADER_FRAGMENT]; + + if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) { + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { +#if GEN_GEN == 7 + /* Even if no fragments are ever dispatched, gen7 hardware hangs if + * we don't at least set the maximum number of threads. + */ + ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; +#endif + } + return; + } + + const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); + + anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) { + ps.KernelStartPointer0 = fs_bin->kernel.offset; + ps.KernelStartPointer1 = 0; + ps.KernelStartPointer2 = fs_bin->kernel.offset + + wm_prog_data->prog_offset_2; + ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; + ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; + ps._32PixelDispatchEnable = false; + + ps.SingleProgramFlow = false; + ps.VectorMaskEnable = true; + ps.SamplerCount = get_sampler_count(fs_bin); + ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin); + ps.PushConstantEnable = wm_prog_data->base.nr_params > 0; + ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ? + POSOFFSET_SAMPLE: POSOFFSET_NONE; +#if GEN_GEN < 8 + ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0; + ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; + ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend; +#endif + +#if GEN_IS_HASWELL + /* Haswell requires the sample mask to be set in this packet as well + * as in 3DSTATE_SAMPLE_MASK; the values should match. + */ + ps.SampleMask = 0xff; +#endif + +#if GEN_GEN >= 9 + ps.MaximumNumberofThreadsPerPSD = 64 - 1; +#elif GEN_GEN >= 8 + ps.MaximumNumberofThreadsPerPSD = 64 - 2; +#else + ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1; +#endif + + ps.DispatchGRFStartRegisterForConstantSetupData0 = + wm_prog_data->base.dispatch_grf_start_reg; + ps.DispatchGRFStartRegisterForConstantSetupData1 = 0; + ps.DispatchGRFStartRegisterForConstantSetupData2 = + wm_prog_data->dispatch_grf_start_reg_2; + + ps.PerThreadScratchSpace = get_scratch_space(fs_bin); + ps.ScratchSpaceBasePointer = + get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin); + } +} + #endif /* GENX_PIPELINE_UTIL_H */ -- 2.30.2