From: Jason Ekstrand Date: Fri, 9 Sep 2016 23:30:24 +0000 (-0700) Subject: i965: Add blorp support for gen4-5 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=752d7af77a52898cebf5597def4fdd38b1d6303e;p=mesa.git i965: Add blorp support for gen4-5 Due to complications with things such as URB setup on gen4-5, it's easier to keep gen4 support in blorp completely internal to i965. This makes things a bit awkward because that means there's a file in i965 that includes blorp_priv.h but it's either that or have a file in blorp that includes brw_context.h. Reviewed-by: Topi Pohjolainen --- diff --git a/src/intel/blorp/blorp.c b/src/intel/blorp/blorp.c index 7f1566f00a4..ea3b8252a2a 100644 --- a/src/intel/blorp/blorp.c +++ b/src/intel/blorp/blorp.c @@ -124,10 +124,10 @@ brw_blorp_surface_info_init(struct blorp_context *blorp, info->z_offset = 0; } - /* Sandy Bridge has a limit of a maximum of 512 layers for layered - * rendering. + /* Sandy Bridge and earlier have a limit of a maximum of 512 layers for + * layered rendering. */ - if (is_render_target && blorp->isl_dev->info->gen == 6) + if (is_render_target && blorp->isl_dev->info->gen <= 6) info->view.array_len = MIN2(info->view.array_len, 512); } diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c index 1f8ea492de9..fe24f1f4753 100644 --- a/src/intel/blorp/blorp_blit.c +++ b/src/intel/blorp/blorp_blit.c @@ -1672,6 +1672,18 @@ try_blorp_blit(struct blorp_batch *batch, coords->y.dst0, coords->y.dst1, coords->y.mirror); + + if (devinfo->gen == 4) { + /* The MinLOD and MinimumArrayElement don't work properly for cube maps. + * Convert them to a single slice on gen4. + */ + if (params->dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT) + blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms->dst); + + if (params->src.surf.usage & ISL_SURF_USAGE_CUBE_BIT) + blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms->src); + } + if (devinfo->gen > 6 && params->dst.surf.msaa_layout == ISL_MSAA_LAYOUT_INTERLEAVED) { assert(params->dst.surf.samples > 1); diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c index 40a1a10b3da..fea5eb7b442 100644 --- a/src/intel/blorp/blorp_clear.c +++ b/src/intel/blorp/blorp_clear.c @@ -366,11 +366,6 @@ blorp_clear(struct blorp_batch *batch, struct blorp_params params; blorp_params_init(¶ms); - params.x0 = x0; - params.y0 = y0; - params.x1 = x1; - params.y1 = y1; - /* Manually apply the clear destination swizzle. This way swizzled clears * will work for swizzles which we can't normally use for rendering and it * also ensures that they work on pre-Haswell hardware which can't swizlle @@ -427,6 +422,27 @@ blorp_clear(struct blorp_batch *batch, start_layer, format, true); params.dst.view.swizzle = swizzle; + params.x0 = x0; + params.y0 = y0; + params.x1 = x1; + params.y1 = y1; + + /* The MinLOD and MinimumArrayElement don't work properly for cube maps. + * Convert them to a single slice on gen4. + */ + if (batch->blorp->isl_dev->info->gen == 4 && + (params.dst.surf.usage & ISL_SURF_USAGE_CUBE_BIT)) { + blorp_surf_convert_to_single_slice(batch->blorp->isl_dev, ¶ms.dst); + + if (params.dst.tile_x_sa || params.dst.tile_y_sa) { + /* This is gen4 so there is no multisampling and sa == px. */ + params.x0 += params.dst.tile_x_sa; + params.y0 += params.dst.tile_y_sa; + params.x1 += params.dst.tile_x_sa; + params.y1 += params.dst.tile_y_sa; + } + } + params.num_samples = params.dst.surf.samples; /* We may be restricted on the number of layers we can bind at any one diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h index 058dedc1981..8b9b8d27755 100644 --- a/src/intel/blorp/blorp_genX_exec.h +++ b/src/intel/blorp/blorp_genX_exec.h @@ -76,6 +76,10 @@ static void blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size, unsigned sf_entry_size); +static void +blorp_emit_pipeline(struct blorp_batch *batch, + const struct blorp_params *params); + /***** BEGIN blorp_exec implementation ******/ #include "genxml/gen_macros.h" @@ -272,6 +276,9 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch, vb[0].BufferAccessType = VERTEXDATA; vb[0].EndAddress = vb[0].BufferStartingAddress; vb[0].EndAddress.offset += size - 1; +#elif GEN_GEN == 4 + vb[0].BufferAccessType = VERTEXDATA; + vb[0].MaxIndex = 2; #endif blorp_emit_input_varying_data(batch, params, @@ -290,6 +297,9 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch, vb[1].BufferAccessType = INSTANCEDATA; vb[1].EndAddress = vb[1].BufferStartingAddress; vb[1].EndAddress.offset += size - 1; +#elif GEN_GEN == 4 + vb[1].BufferAccessType = INSTANCEDATA; + vb[1].MaxIndex = 0; #endif const unsigned num_dwords = 1 + GENX(VERTEX_BUFFER_STATE_length) * 2; @@ -309,7 +319,8 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, { const unsigned num_varyings = params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned num_elements = 2 + num_varyings; + bool need_ndc = batch->blorp->compiler->devinfo->gen <= 5; + const unsigned num_elements = 2 + need_ndc + num_varyings; struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements]; memset(ve, 0, num_elements * sizeof(*ve)); @@ -382,8 +393,31 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, #endif .Component2Control = VFCOMP_STORE_SRC, .Component3Control = VFCOMP_STORE_SRC, +#if GEN_GEN <= 5 + .DestinationElementOffset = slot * 4, +#endif + }; + slot++; + +#if GEN_GEN <= 5 + /* On Iron Lake and earlier, a native device coordinates version of the + * position goes right after the normal VUE header and before position. + * Since w == 1 for all of our coordinates, this is just a copy of the + * position. + */ + ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { + .VertexBufferIndex = 0, + .Valid = true, + .SourceElementFormat = ISL_FORMAT_R32G32B32_FLOAT, + .SourceElementOffset = 0, + .Component0Control = VFCOMP_STORE_SRC, + .Component1Control = VFCOMP_STORE_SRC, + .Component2Control = VFCOMP_STORE_SRC, + .Component3Control = VFCOMP_STORE_1_FP, + .DestinationElementOffset = slot * 4, }; slot++; +#endif ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) { .VertexBufferIndex = 0, @@ -394,6 +428,9 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, .Component1Control = VFCOMP_STORE_SRC, .Component2Control = VFCOMP_STORE_SRC, .Component3Control = VFCOMP_STORE_1_FP, +#if GEN_GEN <= 5 + .DestinationElementOffset = slot * 4, +#endif }; slot++; @@ -407,6 +444,9 @@ blorp_emit_vertex_elements(struct blorp_batch *batch, .Component1Control = VFCOMP_STORE_SRC, .Component2Control = VFCOMP_STORE_SRC, .Component3Control = VFCOMP_STORE_SRC, +#if GEN_GEN <= 5 + .DestinationElementOffset = slot * 4, +#endif }; slot++; } @@ -1162,6 +1202,7 @@ static void blorp_emit_surface_state(struct blorp_batch *batch, const struct brw_blorp_surface_info *surface, void *state, uint32_t state_offset, + const bool color_write_disables[4], bool is_render_target) { const struct isl_device *isl_dev = batch->blorp->isl_dev; @@ -1178,13 +1219,26 @@ blorp_emit_surface_state(struct blorp_batch *batch, if (aux_usage == ISL_AUX_USAGE_HIZ) aux_usage = ISL_AUX_USAGE_NONE; + isl_channel_mask_t write_disable_mask = 0; + if (is_render_target && GEN_GEN <= 5) { + if (color_write_disables[0]) + write_disable_mask |= ISL_CHANNEL_RED_BIT; + if (color_write_disables[1]) + write_disable_mask |= ISL_CHANNEL_GREEN_BIT; + if (color_write_disables[2]) + write_disable_mask |= ISL_CHANNEL_BLUE_BIT; + if (color_write_disables[3]) + write_disable_mask |= ISL_CHANNEL_ALPHA_BIT; + } + const uint32_t mocs = is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex; isl_surf_fill_state(batch->blorp->isl_dev, state, .surf = &surf, .view = &surface->view, .aux_surf = &surface->aux_surf, .aux_usage = aux_usage, - .mocs = mocs, .clear_color = surface->clear_color); + .mocs = mocs, .clear_color = surface->clear_color, + .write_disables = write_disable_mask); blorp_surface_reloc(batch, state_offset + isl_dev->ss.addr_offset, surface->addr, 0); @@ -1257,7 +1311,7 @@ blorp_emit_surface_states(struct blorp_batch *batch, blorp_emit_surface_state(batch, ¶ms->dst, surface_maps[BLORP_RENDERBUFFER_BT_INDEX], surface_offsets[BLORP_RENDERBUFFER_BT_INDEX], - true); + params->color_write_disable, true); } else { assert(params->depth.enabled || params->stencil.enabled); const struct brw_blorp_surface_info *surface = @@ -1269,7 +1323,8 @@ blorp_emit_surface_states(struct blorp_batch *batch, if (params->src.enabled) { blorp_emit_surface_state(batch, ¶ms->src, surface_maps[BLORP_TEXTURE_BT_INDEX], - surface_offsets[BLORP_TEXTURE_BT_INDEX], false); + surface_offsets[BLORP_TEXTURE_BT_INDEX], + NULL, false); } } diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index 37338167c98..cc030c2adeb 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -126,12 +126,15 @@ i965_FILES = \ libdrm_macros.h i965_gen4_FILES = \ + genX_blorp_exec.c \ genX_state_upload.c i965_gen45_FILES = \ + genX_blorp_exec.c \ genX_state_upload.c i965_gen5_FILES = \ + genX_blorp_exec.c \ genX_state_upload.c i965_gen6_FILES = \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c b/src/mesa/drivers/dri/i965/brw_blorp.c index 64aa4c90883..7404606b9b6 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.c +++ b/src/mesa/drivers/dri/i965/brw_blorp.c @@ -71,6 +71,16 @@ brw_blorp_init(struct brw_context *brw) brw->blorp.compiler = brw->screen->compiler; switch (brw->gen) { + case 4: + if (brw->is_g4x) { + brw->blorp.exec = gen45_blorp_exec; + } else { + brw->blorp.exec = gen4_blorp_exec; + } + break; + case 5: + brw->blorp.exec = gen5_blorp_exec; + break; case 6: brw->blorp.mocs.tex = 0; brw->blorp.mocs.rb = 0; diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h b/src/mesa/drivers/dri/i965/brw_blorp.h index ee4bf3bf541..8743d963abc 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp.h +++ b/src/mesa/drivers/dri/i965/brw_blorp.h @@ -72,6 +72,12 @@ void intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt, unsigned int level, unsigned int layer, enum blorp_hiz_op op); +void gen4_blorp_exec(struct blorp_batch *batch, + const struct blorp_params *params); +void gen45_blorp_exec(struct blorp_batch *batch, + const struct blorp_params *params); +void gen5_blorp_exec(struct blorp_batch *batch, + const struct blorp_params *params); void gen6_blorp_exec(struct blorp_batch *batch, const struct blorp_params *params); void gen7_blorp_exec(struct blorp_batch *batch, diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index d3ed871618c..c815a0454d7 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -1118,8 +1118,7 @@ brwCreateContext(gl_api api, brw_init_surface_formats(brw); - if (brw->gen >= 6) - brw_blorp_init(brw); + brw_blorp_init(brw); brw->urb.size = devinfo->urb.size; diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h new file mode 100644 index 00000000000..183c0da0af3 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h @@ -0,0 +1,197 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +static inline struct blorp_address +dynamic_state_address(struct blorp_batch *batch, uint32_t offset) +{ + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + + return (struct blorp_address) { + .buffer = brw->batch.bo, + .offset = offset, + .write_domain = 0, + .read_domains = I915_GEM_DOMAIN_INSTRUCTION, + }; +} + +static inline struct blorp_address +instruction_state_address(struct blorp_batch *batch, uint32_t offset) +{ + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + + return (struct blorp_address) { + .buffer = brw->cache.bo, + .offset = offset, + .write_domain = 0, + .read_domains = I915_GEM_DOMAIN_INSTRUCTION, + }; +} + +static struct blorp_address +blorp_emit_vs_state(struct blorp_batch *batch, + const struct blorp_params *params) +{ + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + + uint32_t offset; + blorp_emit_dynamic(batch, GENX(VS_STATE), vs, 64, &offset) { + vs.Enable = false; + vs.URBEntryAllocationSize = brw->urb.vsize - 1; +#if GEN_GEN == 5 + vs.NumberofURBEntries = brw->urb.nr_vs_entries >> 2; +#else + vs.NumberofURBEntries = brw->urb.nr_vs_entries; +#endif + } + + return dynamic_state_address(batch, offset); +} + +static struct blorp_address +blorp_emit_sf_state(struct blorp_batch *batch, + const struct blorp_params *params) +{ + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + const struct brw_sf_prog_data *prog_data = params->sf_prog_data; + + uint32_t offset; + blorp_emit_dynamic(batch, GENX(SF_STATE), sf, 64, &offset) { +#if GEN_GEN == 4 + sf.KernelStartPointer = + instruction_state_address(batch, params->sf_prog_kernel); +#else + sf.KernelStartPointer = params->sf_prog_kernel; +#endif + sf.GRFRegisterCount = DIV_ROUND_UP(prog_data->total_grf, 16) - 1; + sf.VertexURBEntryReadLength = prog_data->urb_read_length; + sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET; + sf.DispatchGRFStartRegisterForURBData = 3; + + sf.URBEntryAllocationSize = brw->urb.sfsize - 1; + sf.NumberofURBEntries = brw->urb.nr_sf_entries; + +#if GEN_GEN == 5 + sf.MaximumNumberofThreads = MIN2(48, brw->urb.nr_sf_entries) - 1; +#else + sf.MaximumNumberofThreads = MIN2(24, brw->urb.nr_sf_entries) - 1; +#endif + + sf.ViewportTransformEnable = false; + + sf.CullMode = CULLMODE_NONE; + } + + return dynamic_state_address(batch, offset); +} + +static struct blorp_address +blorp_emit_wm_state(struct blorp_batch *batch, + const struct blorp_params *params) +{ + const struct brw_wm_prog_data *prog_data = params->wm_prog_data; + + uint32_t offset; + blorp_emit_dynamic(batch, GENX(WM_STATE), wm, 64, &offset) { + if (params->src.enabled) { + /* Iron Lake can't do sampler prefetch */ + wm.SamplerCount = (GEN_GEN != 5); + wm.BindingTableEntryCount = 2; + uint32_t sampler = blorp_emit_sampler_state(batch, params); + wm.SamplerStatePointer = dynamic_state_address(batch, sampler); + } + + if (prog_data) { + wm.DispatchGRFStartRegisterForURBData = + prog_data->base.dispatch_grf_start_reg; + wm.SetupURBEntryReadLength = prog_data->num_varying_inputs * 2; + wm.SetupURBEntryReadOffset = 0; + + wm.DepthCoefficientURBReadOffset = 1; + wm.PixelShaderKillPixel = prog_data->uses_kill; + wm.ThreadDispatchEnable = true; + wm.EarlyDepthTestEnable = true; + + wm._8PixelDispatchEnable = prog_data->dispatch_8; + wm._16PixelDispatchEnable = prog_data->dispatch_16; + +#if GEN_GEN == 4 + wm.KernelStartPointer = + instruction_state_address(batch, params->wm_prog_kernel); + wm.GRFRegisterCount = prog_data->reg_blocks_0; +#else + wm.KernelStartPointer0 = params->wm_prog_kernel; + wm.GRFRegisterCount0 = prog_data->reg_blocks_0; + wm.KernelStartPointer2 = + params->wm_prog_kernel + prog_data->prog_offset_2; + wm.GRFRegisterCount2 = prog_data->reg_blocks_2; +#endif + } + + wm.MaximumNumberofThreads = + batch->blorp->compiler->devinfo->max_wm_threads - 1; + } + + return dynamic_state_address(batch, offset); +} + +static struct blorp_address +blorp_emit_color_calc_state(struct blorp_batch *batch, + const struct blorp_params *params) +{ + uint32_t cc_viewport = blorp_emit_cc_viewport(batch, params); + + uint32_t offset; + blorp_emit_dynamic(batch, GENX(COLOR_CALC_STATE), cc, 64, &offset) { + cc.CCViewportStatePointer = dynamic_state_address(batch, cc_viewport); + } + + return dynamic_state_address(batch, offset); +} + +static void +blorp_emit_pipeline(struct blorp_batch *batch, + const struct blorp_params *params) +{ + assert(batch->blorp->driver_ctx == batch->driver_batch); + struct brw_context *brw = batch->driver_batch; + + emit_urb_config(batch, params); + + blorp_emit(batch, GENX(3DSTATE_PIPELINED_POINTERS), pp) { + pp.PointertoVSState = blorp_emit_vs_state(batch, params); + pp.GSEnable = false; + pp.ClipEnable = false; + pp.PointertoSFState = blorp_emit_sf_state(batch, params); + pp.PointertoWMState = blorp_emit_wm_state(batch, params); + pp.PointertoColorCalcState = blorp_emit_color_calc_state(batch, params); + } + + brw_upload_urb_fence(brw); + + blorp_emit(batch, GENX(CS_URB_STATE), curb); + blorp_emit(batch, GENX(CONSTANT_BUFFER), curb); +} diff --git a/src/mesa/drivers/dri/i965/genX_blorp_exec.c b/src/mesa/drivers/dri/i965/genX_blorp_exec.c index 72ac274d2f5..3451d7187eb 100644 --- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c +++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c @@ -32,6 +32,10 @@ #include "blorp/blorp_genX_exec.h" +#if GEN_GEN <= 5 +#include "gen4_blorp_exec.h" +#endif + #include "brw_blorp.h" static void * @@ -169,8 +173,11 @@ blorp_emit_urb_config(struct blorp_batch *batch, brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; gen7_upload_urb(brw, vs_entry_size, false, false); -#else +#elif GEN_GEN == 6 gen6_upload_urb(brw, vs_entry_size, false, 0); +#else + /* We calculate it now and emit later. */ + brw_calculate_urb_fence(brw, 0, vs_entry_size, sf_entry_size); #endif } @@ -215,7 +222,9 @@ retry: gen7_l3_state.emit(brw); #endif +#if GEN_GEN >= 6 brw_emit_depth_stall_flushes(brw); +#endif #if GEN_GEN == 8 gen8_write_pma_stall_bits(brw, 0);