{
ANV_FROM_HANDLE(anv_device, device, _device);
ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass);
- const struct anv_physical_device *physical_device =
- &device->instance->physicalDevice;
- const struct gen_device_info *devinfo = &physical_device->info;
struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
struct anv_pipeline *pipeline;
VkResult result;
emit_3dstate_vs(pipeline);
emit_3dstate_gs(pipeline);
emit_3dstate_sbe(pipeline);
+ emit_3dstate_ps(pipeline);
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_WM), wm) {
wm.EarlyDepthStencilControl = EDSC_NORMAL;
wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
}
-
- /* Even if no fragments are ever dispatched, the hardware hangs if we
- * don't at least set the maximum number of threads.
- */
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
- ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
- }
} else {
- const struct anv_shader_bin *fs_bin =
- pipeline->shaders[MESA_SHADER_FRAGMENT];
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
if (wm_prog_data->urb_setup[VARYING_SLOT_BFC0] != -1 ||
if (wm_prog_data->urb_setup[VARYING_SLOT_PRIMITIVE_ID] != -1)
anv_finishme("primitive_id needs sbe swizzling setup");
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
- ps.KernelStartPointer0 = fs_bin->kernel.offset;
- ps.KernelStartPointer1 = 0;
- ps.KernelStartPointer2 = fs_bin->kernel.offset +
- wm_prog_data->prog_offset_2;
-
- ps.ScratchSpaceBasePointer = (struct anv_address) {
- .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
- MESA_SHADER_FRAGMENT,
- wm_prog_data->base.total_scratch),
- .offset = 0,
- };
- ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
-
- ps.SamplerCount = get_sampler_count(fs_bin);
- ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
-
- ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
- ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
- ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
- ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
-
- ps.RenderTargetFastClearEnable = false;
- ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend;
- ps.RenderTargetResolveEnable = false;
-
- ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
- POSOFFSET_SAMPLE : POSOFFSET_NONE;
-
- ps._32PixelDispatchEnable = false;
- ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
- ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
-
- ps.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg,
- ps.DispatchGRFStartRegisterForConstantSetupData1 = 0,
- ps.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
-
- /* Haswell requires the sample mask to be set in this packet as well as
- * in 3DSTATE_SAMPLE_MASK; the values should match. */
- /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
-#if GEN_IS_HASWELL
- ps.SampleMask = 0xff;
-#endif
- }
-
uint32_t samples = pCreateInfo->pMultisampleState ?
pCreateInfo->pMultisampleState->rasterizationSamples : 1;
emit_3dstate_gs(pipeline);
emit_3dstate_vs(pipeline);
emit_3dstate_sbe(pipeline);
+ emit_3dstate_ps(pipeline);
- const int num_thread_bias = GEN_GEN == 8 ? 2 : 1;
if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps);
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), extra) {
extra.PixelShaderValid = false;
}
} else {
- const struct anv_shader_bin *fs_bin =
- pipeline->shaders[MESA_SHADER_FRAGMENT];
-
- anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
- ps.KernelStartPointer0 = fs_bin->kernel.offset;
- ps.KernelStartPointer1 = 0;
- ps.KernelStartPointer2 = fs_bin->kernel.offset +
- wm_prog_data->prog_offset_2;
- ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
- ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
- ps._32PixelDispatchEnable = false;
- ps.SingleProgramFlow = false;
- ps.VectorMaskEnable = true;
- ps.SamplerCount = get_sampler_count(fs_bin);
- ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
- ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
- ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
- POSOFFSET_SAMPLE: POSOFFSET_NONE;
-
- ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias;
-
- ps.ScratchSpaceBasePointer = (struct anv_address) {
- .bo = anv_scratch_pool_alloc(device, &device->scratch_pool,
- MESA_SHADER_FRAGMENT,
- wm_prog_data->base.total_scratch),
- .offset = 0,
- };
- ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base);
-
- ps.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
- ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
- ps.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
- }
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS_EXTRA), ps) {
ps.PixelShaderValid = true;
#endif
}
-static inline uint32_t
-scratch_space(const struct brw_stage_prog_data *prog_data)
-{
- return ffs(prog_data->total_scratch / 2048);
-}
-
static const uint32_t vk_to_gen_cullmode[] = {
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
}
}
+static void
+emit_3dstate_ps(struct anv_pipeline *pipeline)
+{
+ MAYBE_UNUSED const struct gen_device_info *devinfo = &pipeline->device->info;
+ const struct anv_shader_bin *fs_bin =
+ pipeline->shaders[MESA_SHADER_FRAGMENT];
+
+ if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+#if GEN_GEN == 7
+ /* Even if no fragments are ever dispatched, gen7 hardware hangs if
+ * we don't at least set the maximum number of threads.
+ */
+ ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
+#endif
+ }
+ return;
+ }
+
+ const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
+
+ anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
+ ps.KernelStartPointer0 = fs_bin->kernel.offset;
+ ps.KernelStartPointer1 = 0;
+ ps.KernelStartPointer2 = fs_bin->kernel.offset +
+ wm_prog_data->prog_offset_2;
+ ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
+ ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
+ ps._32PixelDispatchEnable = false;
+
+ ps.SingleProgramFlow = false;
+ ps.VectorMaskEnable = true;
+ ps.SamplerCount = get_sampler_count(fs_bin);
+ ps.BindingTableEntryCount = get_binding_table_entry_count(fs_bin);
+ ps.PushConstantEnable = wm_prog_data->base.nr_params > 0;
+ ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
+ POSOFFSET_SAMPLE: POSOFFSET_NONE;
+#if GEN_GEN < 8
+ ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
+ ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
+ ps.DualSourceBlendEnable = wm_prog_data->dual_src_blend;
+#endif
+
+#if GEN_IS_HASWELL
+ /* Haswell requires the sample mask to be set in this packet as well
+ * as in 3DSTATE_SAMPLE_MASK; the values should match.
+ */
+ ps.SampleMask = 0xff;
+#endif
+
+#if GEN_GEN >= 9
+ ps.MaximumNumberofThreadsPerPSD = 64 - 1;
+#elif GEN_GEN >= 8
+ ps.MaximumNumberofThreadsPerPSD = 64 - 2;
+#else
+ ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
+#endif
+
+ ps.DispatchGRFStartRegisterForConstantSetupData0 =
+ wm_prog_data->base.dispatch_grf_start_reg;
+ ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
+ ps.DispatchGRFStartRegisterForConstantSetupData2 =
+ wm_prog_data->dispatch_grf_start_reg_2;
+
+ ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
+ ps.ScratchSpaceBasePointer =
+ get_scratch_address(pipeline, MESA_SHADER_FRAGMENT, fs_bin);
+ }
+}
+
#endif /* GENX_PIPELINE_UTIL_H */