From 16a9fcbbb688537ac2e8f952e683d63eb7c688e8 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 9 Aug 2016 19:27:58 -0700 Subject: [PATCH] i965/blorp: Use genxml for gen8-9 state setup Signed-off-by: Jason Ekstrand Reviewed-by: Topi Pohjolainen --- src/mesa/drivers/dri/i965/Makefile.am | 10 +- src/mesa/drivers/dri/i965/Makefile.sources | 7 +- src/mesa/drivers/dri/i965/blorp.c | 4 +- src/mesa/drivers/dri/i965/blorp_priv.h | 3 + src/mesa/drivers/dri/i965/gen8_blorp.c | 578 -------------------- src/mesa/drivers/dri/i965/genX_blorp_exec.c | 157 +++++- 6 files changed, 172 insertions(+), 587 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/gen8_blorp.c diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index ad4e365f6fd..d6bafed3000 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -51,7 +51,9 @@ brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compil I965_PERGEN_LIBS = \ libi965_gen6.la \ libi965_gen7.la \ - libi965_gen75.la + libi965_gen75.la \ + libi965_gen8.la \ + libi965_gen9.la libi965_gen6_la_SOURCES = $(i965_gen6_FILES) libi965_gen6_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=60 @@ -62,6 +64,12 @@ libi965_gen7_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=70 libi965_gen75_la_SOURCES = $(i965_gen75_FILES) libi965_gen75_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=75 +libi965_gen8_la_SOURCES = $(i965_gen8_FILES) +libi965_gen8_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=80 + +libi965_gen9_la_SOURCES = $(i965_gen9_FILES) +libi965_gen9_la_CFLAGS = $(AM_CFLAGS) -DGEN_VERSIONx10=90 + noinst_LTLIBRARIES = \ libi965_dri.la \ libi965_compiler.la \ diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 130f2b9df81..51c3205e5d2 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -207,7 +207,6 @@ i965_FILES = \ gen7_wm_state.c \ gen7_wm_surface_state.c \ gen8_blend_state.c \ - gen8_blorp.c \ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ @@ -268,3 +267,9 @@ i965_gen7_FILES = \ i965_gen75_FILES = \ genX_blorp_exec.c + +i965_gen8_FILES = \ + genX_blorp_exec.c + +i965_gen9_FILES = \ + genX_blorp_exec.c diff --git a/src/mesa/drivers/dri/i965/blorp.c b/src/mesa/drivers/dri/i965/blorp.c index 87cf2c96d9e..310061566d9 100644 --- a/src/mesa/drivers/dri/i965/blorp.c +++ b/src/mesa/drivers/dri/i965/blorp.c @@ -321,9 +321,11 @@ retry: gen7_blorp_exec(brw, params); break; case 8: - case 9: gen8_blorp_exec(brw, params); break; + case 9: + gen9_blorp_exec(brw, params); + break; default: /* BLORP is not supported before Gen6. */ unreachable("not reached"); diff --git a/src/mesa/drivers/dri/i965/blorp_priv.h b/src/mesa/drivers/dri/i965/blorp_priv.h index ce6aaa7a80e..3ca1c223416 100644 --- a/src/mesa/drivers/dri/i965/blorp_priv.h +++ b/src/mesa/drivers/dri/i965/blorp_priv.h @@ -203,6 +203,9 @@ gen75_blorp_exec(struct brw_context *brw, void gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params); +void +gen9_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params); + struct brw_blorp_blit_prog_key { /* Number of samples per pixel that have been configured in the surface diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c deleted file mode 100644 index 2223b2329c5..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_blorp.c +++ /dev/null @@ -1,578 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "blorp_priv.h" - -static uint32_t -gen8_blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t blend_state_offset; - - assume(params->num_draw_buffers); - - const unsigned size = 4 + 8 * params->num_draw_buffers; - uint32_t *blend = (uint32_t *)brw_state_batch(brw, AUB_TRACE_BLEND_STATE, - size, 64, - &blend_state_offset); - memset(blend, 0, size); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - if (params->color_write_disable[0]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_RED; - if (params->color_write_disable[1]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_GREEN; - if (params->color_write_disable[2]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_BLUE; - if (params->color_write_disable[3]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_ALPHA; - - blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE | - GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE | - GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT; - } - - return blend_state_offset; -} - -/* Hardware seems to try to fetch the constants even though the corresponding - * stage gets disabled. Therefore make sure the settings for the constant - * buffer are valid. - */ -static void -gen8_blorp_disable_constant_state(struct brw_context *brw, - unsigned opcode) -{ - BEGIN_BATCH(11); - OUT_BATCH(opcode << 16 | (11 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_VS - * - * Disable vertex shader. - */ -static void -gen8_blorp_emit_vs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_HS - * - * Disable the hull shader. - */ -static void -gen8_blorp_emit_hs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_DS - * - * Disable the domain shader. - */ -static void -gen8_blorp_emit_ds_disable(struct brw_context *brw) -{ - const int ds_pkt_len = brw->gen >= 9 ? 11 : 9; - BEGIN_BATCH(ds_pkt_len); - OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); - for (int i = 0; i < ds_pkt_len - 1; i++) - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_GS - * - * Disable the geometry shader. - */ -static void -gen8_blorp_emit_gs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(10); - OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ -static void -gen8_blorp_emit_streamout_disable(struct brw_context *brw) -{ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_raster_state(struct brw_context *brw) -{ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2)); - OUT_BATCH(GEN8_RASTER_CULL_NONE); - OUT_BATCH_F(0); - OUT_BATCH_F(0); - OUT_BATCH_F(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_sbe_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; - const unsigned urb_read_length = - brw_blorp_get_urb_length(params->wm_prog_data); - - /* 3DSTATE_SBE */ - { - const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6; - BEGIN_BATCH(sbe_cmd_length); - OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2)); - - /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the - * vertex data coming from vertex fetcher is taken as unmodified - * (i.e., passed through). Vertex shader state is disabled and vertex - * fetcher builds complete vertex entries including VUE header. - * This is for unknown reason really needed to be disabled when more - * than one vec4 worth of vertex attributes are needed. - */ - OUT_BATCH(num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | - urb_read_length << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << - GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | - GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | - GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET); - OUT_BATCH(0); - OUT_BATCH(params->wm_prog_data->flat_inputs); - if (sbe_cmd_length >= 6) { - /* Fragment coordinates are always enabled. */ - uint32_t dw4 = (GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1)); - - for (unsigned i = 0; i < num_varyings; ++i) { - dw4 |= (GEN9_SBE_ACTIVE_COMPONENT_XYZW << ((i + 1) << 1)); - } - - OUT_BATCH(dw4); - OUT_BATCH(0); - } - ADVANCE_BATCH(); - } - - { - BEGIN_BATCH(11); - OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2)); - - /* Output DWords 1 through 8: */ - for (int i = 0; i < 8; i++) { - OUT_BATCH(0); - } - - OUT_BATCH(0); /* wrapshortest enables 0-7 */ - OUT_BATCH(0); /* wrapshortest enables 8-15 */ - ADVANCE_BATCH(); - } -} - -static void -gen8_blorp_emit_sf_config(struct brw_context *brw) -{ - /* See gen6_blorp_emit_sf_config() */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE); - ADVANCE_BATCH(); -} - -/** - * Disable thread dispatch (dw5.19) and enable the HiZ op. - */ -static void -gen8_blorp_emit_wm_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/** - * 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the - * valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ -static void -gen8_blorp_emit_ps_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw3, dw5, dw6, dw7, ksp0, ksp2; - - dw3 = dw5 = dw6 = dw7 = ksp0 = ksp2 = 0; - dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; - - if (params->src.bo) { - dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ - dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */ - } else { - dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */ - } - - dw7 |= prog_data->first_curbe_grf_0 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; - dw7 |= prog_data->first_curbe_grf_2 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2; - - if (params->wm_prog_data->dispatch_8) - dw6 |= GEN7_PS_8_DISPATCH_ENABLE; - if (params->wm_prog_data->dispatch_16) - dw6 |= GEN7_PS_16_DISPATCH_ENABLE; - - ksp0 = params->wm_prog_kernel; - ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2; - - /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; - * it implicitly scales for different GT levels (which have some # of PSDs). - * - * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. - */ - if (brw->gen >= 9) - dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT; - else - dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT; - - dw6 |= GEN7_PS_POSOFFSET_NONE; - dw6 |= params->fast_clear_op; - - BEGIN_BATCH(12); - OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); - OUT_BATCH(ksp0); - OUT_BATCH(0); - OUT_BATCH(dw3); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(dw6); - OUT_BATCH(dw7); - OUT_BATCH(0); /* kernel 1 pointer */ - OUT_BATCH(0); - OUT_BATCH(ksp2); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_ps_blend(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2)); - OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_ps_extra(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw1 = 0; - - dw1 |= GEN8_PSX_PIXEL_SHADER_VALID; - - if (params->src.bo) - dw1 |= GEN8_PSX_KILL_ENABLE; - - if (params->wm_prog_data->num_varying_inputs) - dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE; - - if (params->dst.surf.samples > 1 && prog_data && - prog_data->persample_msaa_dispatch) - dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); - OUT_BATCH(dw1); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_depth_disable(struct brw_context *brw) -{ - /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ - if (brw->no_depth_or_stencil) - return; - - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(8); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2)); - OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(5); - OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(5); - OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_topology(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2)); - OUT_BATCH(_3DPRIM_RECTLIST); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_instancing_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned num_elems = 2 + num_varyings; - - for (unsigned i = 0; i < num_elems; ++i) { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(i); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -static void -gen8_blorp_emit_vf_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned pkt_len = brw->gen >= 9 ? 4 : 3; - - BEGIN_BATCH(pkt_len); - OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - if (pkt_len > 3) { - OUT_BATCH(0); - } - ADVANCE_BATCH(); -} - -static uint32_t -gen8_blorp_emit_surface_states(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t wm_surf_offset_renderbuffer; - uint32_t wm_surf_offset_texture = 0; - - wm_surf_offset_renderbuffer = - brw_blorp_emit_surface_state(brw, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - true /* is_render_target */); - if (params->src.bo) { - wm_surf_offset_texture = - brw_blorp_emit_surface_state(brw, ¶ms->src, - I915_GEM_DOMAIN_SAMPLER, 0, - false /* is_render_target */); - } - - return gen6_blorp_emit_binding_table(brw, - wm_surf_offset_renderbuffer, - wm_surf_offset_texture); -} - -/** - * \copydoc gen6_blorp_exec() - */ -void -gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) -{ - uint32_t wm_bind_bo_offset = 0; - - brw_upload_state_base_address(brw); - - gen7_l3_state.emit(brw); - - gen7_blorp_emit_urb_config(brw, params); - - const uint32_t cc_blend_state_offset = - gen8_blorp_emit_blend_state(brw, params); - gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); - - const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw); - gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); - - gen8_blorp_emit_depth_stencil_state(brw, params); - - gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_VS); - gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_HS); - gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_DS); - gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_GS); - gen8_blorp_disable_constant_state(brw, _3DSTATE_CONSTANT_PS); - - wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params); - - gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); - - if (params->src.bo) { - const uint32_t sampler_offset = - gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); - } - - gen8_emit_3dstate_multisample(brw, params->dst.surf.samples); - gen6_emit_3dstate_sample_mask(brw, - params->dst.surf.samples > 1 ? - (1 << params->dst.surf.samples) - 1 : 1); - - gen8_disable_stages.emit(brw); - gen8_blorp_emit_vs_disable(brw); - gen8_blorp_emit_hs_disable(brw); - gen7_blorp_emit_te_disable(brw); - gen8_blorp_emit_ds_disable(brw); - gen8_blorp_emit_gs_disable(brw); - - gen8_blorp_emit_streamout_disable(brw); - gen6_blorp_emit_clip_disable(brw); - gen8_blorp_emit_raster_state(brw); - gen8_blorp_emit_sbe_state(brw, params); - gen8_blorp_emit_sf_config(brw); - - gen8_blorp_emit_ps_blend(brw); - gen8_blorp_emit_ps_extra(brw, params); - - gen8_blorp_emit_ps_config(brw, params); - - gen8_blorp_emit_wm_state(brw); - - gen7_blorp_emit_cc_viewport(brw); - - gen8_blorp_emit_depth_disable(brw); - gen7_blorp_emit_clear_params(brw, params); - gen6_blorp_emit_drawing_rectangle(brw, params); - gen8_blorp_emit_vf_topology(brw); - gen8_blorp_emit_vf_sys_gen_vals_state(brw); - gen6_blorp_emit_vertices(brw, params); - gen8_blorp_emit_vf_instancing_state(brw, params); - gen8_blorp_emit_vf_state(brw); - gen7_blorp_emit_primitive(brw, params); - - if (brw->gen < 9) - gen8_write_pma_stall_bits(brw, 0); -} diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 1eba713c195..eb2dc6c51fd 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -252,8 +252,12 @@ blorp_emit_vertex_buffers(struct brw_context *brw, unsigned num_buffers = 1; -#if GEN_GEN == 7 - uint32_t mocs = 1 /* GEN7_MOCS_L3 */; +#if GEN_GEN == 9 + uint32_t mocs = (2 << 1); /* SKL_MOCS_WB */ +#elif GEN_GEN == 8 + uint32_t mocs = 0x78; /* BDW_MOCS_WB */ +#elif GEN_GEN == 7 + uint32_t mocs = 1; /* GEN7_MOCS_L3 */ #else uint32_t mocs = 0; #endif @@ -266,22 +270,30 @@ blorp_emit_vertex_buffers(struct brw_context *brw, #if GEN_GEN >= 7 vb[0].AddressModifyEnable = true; #endif +#if GEN_GEN >= 8 + vb[0].BufferSize = size; +#else vb[0].BufferAccessType = VERTEXDATA; vb[0].EndAddress = vb[0].BufferStartingAddress; vb[0].EndAddress.offset += size - 1; +#endif if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) { blorp_emit_input_varying_data(brw, params, &vb[1].BufferStartingAddress, &size); vb[1].VertexBufferIndex = 1; vb[1].BufferPitch = 0; - vb[1].BufferAccessType = INSTANCEDATA; vb[1].VertexBufferMOCS = mocs; #if GEN_GEN >= 7 vb[1].AddressModifyEnable = true; #endif +#if GEN_GEN >= 8 + vb[1].BufferSize = size; +#else + vb[1].BufferAccessType = INSTANCEDATA; vb[1].EndAddress = vb[1].BufferStartingAddress; - vb[1].EndAddress.offset += size; + vb[1].EndAddress.offset += size - 1; +#endif num_buffers++; } @@ -387,6 +399,21 @@ blorp_emit_vertex_elements(struct brw_context *brw, GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]); dw += GENX(VERTEX_ELEMENT_STATE_length); } + +#if GEN_GEN >= 8 + blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs); + + for (unsigned i = 0; i < num_elements; i++) { + blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) { + vf.VertexElementIndex = i; + vf.InstancingEnable = false; + } + } + + blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) { + topo.PrimitiveTopologyType = _3DPRIM_RECTLIST; + } +#endif } static void @@ -414,7 +441,29 @@ blorp_emit_sf_config(struct brw_context *brw, * (rendering rectangle (RECTLIST) objects. */ -#if GEN_GEN >= 7 +#if GEN_GEN >= 8 + + blorp_emit(brw, GENX(3DSTATE_SF), sf); + + blorp_emit(brw, GENX(3DSTATE_RASTER), raster) { + raster.CullMode = CULLMODE_NONE; + } + + blorp_emit(brw, GENX(3DSTATE_SBE), sbe) { + sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; + sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs; + sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data); + sbe.ForceVertexURBEntryReadLength = true; + sbe.ForceVertexURBEntryReadOffset = true; + sbe.ConstantInterpolationEnable = prog_data->flat_inputs; + +#if GEN_GEN >= 9 + for (unsigned i = 0; i < 32; i++) + sbe.AttributeActiveComponentFormat[i] = ACF_XYZW; +#endif + } + +#elif GEN_GEN >= 7 blorp_emit(brw, GENX(3DSTATE_SF), sf) { sf.FrontFaceFillMode = FILL_MODE_SOLID; @@ -478,7 +527,73 @@ blorp_emit_ps_config(struct brw_context *brw, * configure the WM state whether or not there is a WM program. */ -#if GEN_GEN >= 7 +#if GEN_GEN >= 8 + + blorp_emit(brw, GENX(3DSTATE_WM), wm); + + blorp_emit(brw, GENX(3DSTATE_PS), ps) { + if (params->src.bo) { + ps.SamplerCount = 1; /* Up to 4 samplers */ + ps.BindingTableEntryCount = 2; + } else { + ps.BindingTableEntryCount = 1; + } + + ps.DispatchGRFStartRegisterForConstantSetupData0 = + prog_data->first_curbe_grf_0; + ps.DispatchGRFStartRegisterForConstantSetupData2 = + prog_data->first_curbe_grf_2; + + ps._8PixelDispatchEnable = prog_data->dispatch_8; + ps._16PixelDispatchEnable = prog_data->dispatch_16; + + ps.KernelStartPointer0 = params->wm_prog_kernel; + ps.KernelStartPointer2 = + params->wm_prog_kernel + prog_data->ksp_offset_2; + + /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; + * it implicitly scales for different GT levels (which have some # of + * PSDs). + * + * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. + */ + if (GEN_GEN >= 9) + ps.MaximumNumberofThreadsPerPSD = 64 - 1; + else + ps.MaximumNumberofThreadsPerPSD = 64 - 2; + + switch (params->fast_clear_op) { +#if GEN_GEN >= 9 + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ + ps.RenderTargetResolveType = RESOLVE_PARTIAL; + break; + case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */ + ps.RenderTargetResolveType = RESOLVE_FULL; + break; +#else + case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */ + ps.RenderTargetResolveEnable = true; + break; +#endif + case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */ + ps.RenderTargetFastClearEnable = true; + break; + } + } + + blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) { + psx.PixelShaderValid = true; + + if (params->src.bo) + psx.PixelShaderKillsPixel = true; + + psx.AttributeEnable = prog_data->num_varying_inputs > 0; + + if (prog_data && prog_data->persample_msaa_dispatch) + psx.PixelShaderIsPerSample = true; + } + +#elif GEN_GEN >= 7 blorp_emit(brw, GENX(3DSTATE_WM), wm) { switch (params->hiz_op) { @@ -713,6 +828,15 @@ blorp_emit_blend_state(struct brw_context *brw, #if GEN_GEN >= 7 blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) { sp.BlendStatePointer = offset; +#if GEN_GEN >= 8 + sp.BlendStatePointerValid = true; +#endif + } +#endif + +#if GEN_GEN >= 8 + blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) { + ps_blend.HasWriteableRT = true; } #endif @@ -731,6 +855,9 @@ blorp_emit_color_calc_state(struct brw_context *brw, #if GEN_GEN >= 7 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) { sp.ColorCalcStatePointer = offset; +#if GEN_GEN >= 8 + sp.ColorCalcStatePointerValid = true; +#endif } #endif @@ -741,6 +868,14 @@ static uint32_t blorp_emit_depth_stencil_state(struct brw_context *brw, const struct brw_blorp_params *params) { +#if GEN_GEN >= 8 + + /* On gen8+, DEPTH_STENCIL state is simply an instruction */ + blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds); + return 0; + +#else /* GEN_GEN <= 7 */ + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: * - 7.5.3.1 Depth Buffer Clear * - 7.5.3.2 Depth Buffer Resolve @@ -768,6 +903,8 @@ blorp_emit_depth_stencil_state(struct brw_context *brw, #endif return offset; + +#endif /* GEN_GEN */ } static void @@ -898,6 +1035,10 @@ genX(blorp_exec)(struct brw_context *brw, brw_upload_state_base_address(brw); +#if GEN_GEN >= 8 + gen7_l3_state.emit(brw); +#endif + blorp_emit_vertex_buffers(brw, params); blorp_emit_vertex_elements(brw, params); @@ -952,7 +1093,11 @@ genX(blorp_exec)(struct brw_context *brw, if (params->src.bo) blorp_emit_sampler_state(brw, params); +#if GEN_GEN >= 8 + gen8_emit_3dstate_multisample(brw, params->dst.surf.samples); +#else gen6_emit_3dstate_multisample(brw, params->dst.surf.samples); +#endif blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) { mask.SampleMask = (1 << params->dst.surf.samples) - 1; -- 2.30.2