From: Jason Ekstrand Date: Fri, 22 Apr 2016 21:48:36 +0000 (-0700) Subject: i965/blorp: Convert state setup to C X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4040fff81d6bafdf71fae86fc4cb17bf007194af;p=mesa.git i965/blorp: Convert state setup to C Reviewed-by: Topi Pohjolainen Reviewed-by: Matt Turner --- diff --git a/configure.ac b/configure.ac index 35568278c1e..5f75c60bfc4 100644 --- a/configure.ac +++ b/configure.ac @@ -2657,6 +2657,9 @@ AC_OUTPUT # Fix up dependencies in *.Plo files, where we changed the extension of a # source file $SED -i -e 's/brw_blorp.cpp/brw_blorp.c/' src/mesa/drivers/dri/i965/.deps/brw_blorp.Plo +$SED -i -e 's/gen6_blorp.cpp/gen6_blorp.c/' src/mesa/drivers/dri/i965/.deps/gen6_blorp.Plo +$SED -i -e 's/gen7_blorp.cpp/gen7_blorp.c/' src/mesa/drivers/dri/i965/.deps/gen7_blorp.Plo +$SED -i -e 's/gen8_blorp.cpp/gen8_blorp.c/' src/mesa/drivers/dri/i965/.deps/gen8_blorp.Plo dnl diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index fe70d3a1881..441d727bce5 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -177,7 +177,7 @@ i965_FILES = \ brw_wm.h \ brw_wm_state.c \ brw_wm_surface_state.c \ - gen6_blorp.cpp \ + gen6_blorp.c \ gen6_cc.c \ gen6_clip_state.c \ gen6_constant_state.c \ @@ -195,7 +195,7 @@ i965_FILES = \ gen6_viewport_state.c \ gen6_vs_state.c \ gen6_wm_state.c \ - gen7_blorp.cpp \ + gen7_blorp.c \ gen7_cs_state.c \ gen7_ds_state.c \ gen7_gs_state.c \ @@ -211,7 +211,7 @@ i965_FILES = \ gen7_wm_state.c \ gen7_wm_surface_state.c \ gen8_blend_state.c \ - gen8_blorp.cpp \ + gen8_blorp.c \ gen8_depth_state.c \ gen8_disable.c \ gen8_draw_upload.c \ diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c new file mode 100644 index 00000000000..1955811b002 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen6_blorp.c @@ -0,0 +1,1049 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_state.h" + +#include "brw_blorp.h" + +static void +gen6_blorp_emit_vertex_buffer_state(struct brw_context *brw, + unsigned num_elems, + unsigned vbo_size, + uint32_t vertex_offset) +{ + /* 3DSTATE_VERTEX_BUFFERS */ + const int num_buffers = 1; + const int batch_length = 1 + 4 * num_buffers; + + uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | + (num_elems * sizeof(float)) << BRW_VB0_PITCH_SHIFT; + + if (brw->gen >= 7) + dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; + + switch (brw->gen) { + case 7: + dw0 |= GEN7_MOCS_L3 << 16; + break; + case 8: + dw0 |= BDW_MOCS_WB << 16; + break; + case 9: + dw0 |= SKL_MOCS_WB << 16; + break; + } + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + OUT_BATCH(dw0); + if (brw->gen >= 8) { + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, vertex_offset); + OUT_BATCH(vbo_size); + } else { + /* start address */ + OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset); + /* end address */ + OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, + vertex_offset + vbo_size - 1); + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +void +gen6_blorp_emit_vertices(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t vertex_offset; + + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + * + * Only vertex position X and Y are going to be variable, Z is fixed to + * zero and W to one. Header words dw0-3 are all zero. There is no need to + * include the fixed values in the vertex buffer. Vertex fetcher can be + * instructed to fill vertex elements with constant values of one and zero + * instead of reading them from the buffer. See the vertex element setup + * below. + */ + { + float *vertex_data; + + const float vertices[] = { + /* v0 */ (float)params->x0, (float)params->y1, + /* v1 */ (float)params->x1, (float)params->y1, + /* v2 */ (float)params->x0, (float)params->y0, + }; + + vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, + sizeof(vertices), 32, + &vertex_offset); + memcpy(vertex_data, vertices, sizeof(vertices)); + + const unsigned blorp_num_vue_elems = 2; + gen6_blorp_emit_vertex_buffer_state(brw, blorp_num_vue_elems, + sizeof(vertices), vertex_offset); + } + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * the vertex_bo is filled with data. + */ + { + const int num_elements = 2; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); + ADVANCE_BATCH(); + } +} + + +/* 3DSTATE_URB + * + * Assign the entire URB to the VS. Even though the VS disabled, URB space + * is still needed because the clipper loads the VUE's from the URB. From + * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, + * Dword 1.15:0 "VS Number of URB Entries": + * This field is always used (even if VS Function Enable is DISABLED). + * + * The warning below appears in the PRM (Section 3DSTATE_URB), but we can + * safely ignore it because this batch contains only one draw call. + * Because of URB corruption caused by allocating a previous GS unit + * URB entry to the VS unit, software is required to send a “GS NULL + * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) + * plus a dummy DRAW call before any case where VS will be taking over + * GS URB space. + */ +static void +gen6_blorp_emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); + OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* BLEND_STATE */ +uint32_t +gen6_blorp_emit_blend_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t cc_blend_state_offset; + + assume(params->num_draw_buffers); + + const unsigned size = params->num_draw_buffers * + sizeof(struct gen6_blend_state); + struct gen6_blend_state *blend = (struct gen6_blend_state *) + brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, + &cc_blend_state_offset); + + memset(blend, 0, size); + + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { + blend[i].blend1.pre_blend_clamp_enable = 1; + blend[i].blend1.post_blend_clamp_enable = 1; + blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; + + blend[i].blend1.write_disable_r = params->color_write_disable[0]; + blend[i].blend1.write_disable_g = params->color_write_disable[1]; + blend[i].blend1.write_disable_b = params->color_write_disable[2]; + blend[i].blend1.write_disable_a = params->color_write_disable[3]; + } + + return cc_blend_state_offset; +} + + +/* CC_STATE */ +uint32_t +gen6_blorp_emit_cc_state(struct brw_context *brw) +{ + uint32_t cc_state_offset; + + struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) + brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(gen6_color_calc_state), 64, + &cc_state_offset); + memset(cc, 0, sizeof(*cc)); + + return cc_state_offset; +} + + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +uint32_t +gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t depthstencil_offset; + + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + &depthstencil_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; + } + + return depthstencil_offset; +} + + +/* 3DSTATE_CC_STATE_POINTERS + * + * The pointer offsets are relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + * + * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. + */ +static void +gen6_blorp_emit_cc_state_pointers(struct brw_context *brw, + const struct brw_blorp_params *params, + uint32_t cc_blend_state_offset, + uint32_t depthstencil_offset, + uint32_t cc_state_offset) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); + OUT_BATCH(cc_blend_state_offset | 1); /* BLEND_STATE offset */ + OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ + OUT_BATCH(cc_state_offset | 1); /* COLOR_CALC_STATE offset */ + ADVANCE_BATCH(); +} + + +/* WM push constants */ +uint32_t +gen6_blorp_emit_wm_constants(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t wm_push_const_offset; + + void *constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, + sizeof(params->wm_push_consts), + 32, &wm_push_const_offset); + memcpy(constants, ¶ms->wm_push_consts, + sizeof(params->wm_push_consts)); + + return wm_push_const_offset; +} + + +/* SURFACE_STATE for renderbuffer or texture surface (see + * brw_update_renderbuffer_surface and brw_update_texture_surface) + */ +static uint32_t +gen6_blorp_emit_surface_state(struct brw_context *brw, + const struct brw_blorp_params *params, + const struct brw_blorp_surface_info *surface, + uint32_t read_domains, uint32_t write_domain) +{ + uint32_t wm_surf_offset; + uint32_t width = surface->width; + uint32_t height = surface->height; + if (surface->num_samples > 1) { + /* Since gen6 uses INTEL_MSAA_LAYOUT_IMS, width and height are measured + * in samples. But SURFACE_STATE wants them in pixels, so we need to + * divide them each by 2. + */ + width /= 2; + height /= 2; + } + struct intel_mipmap_tree *mt = surface->mt; + uint32_t tile_x, tile_y; + + uint32_t *surf = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, + &wm_surf_offset); + + surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | + BRW_SURFACE_CUBEFACE_ENABLES | + surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT); + + /* reloc */ + surf[1] = (brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + + mt->bo->offset64); + + surf[2] = (0 << BRW_SURFACE_LOD_SHIFT | + (width - 1) << BRW_SURFACE_WIDTH_SHIFT | + (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); + + uint32_t tiling = surface->map_stencil_as_y_tiled + ? BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y + : brw_get_surface_tiling_bits(mt->tiling); + uint32_t pitch_bytes = mt->pitch; + if (surface->map_stencil_as_y_tiled) + pitch_bytes *= 2; + surf[3] = (tiling | + 0 << BRW_SURFACE_DEPTH_SHIFT | + (pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT); + + surf[4] = brw_get_surface_num_multisamples(surface->num_samples); + + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | + (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | + (surface->mt->valign == 4 ? + BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); + + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(brw->batch.bo, + wm_surf_offset + 4, + mt->bo, + surf[1] - mt->bo->offset64, + read_domains, write_domain); + + return wm_surf_offset; +} + + +/* BINDING_TABLE. See brw_wm_binding_table(). */ +uint32_t +gen6_blorp_emit_binding_table(struct brw_context *brw, + uint32_t wm_surf_offset_renderbuffer, + uint32_t wm_surf_offset_texture) +{ + uint32_t wm_bind_bo_offset; + uint32_t *bind = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * + BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, + 32, /* alignment */ + &wm_bind_bo_offset); + bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = + wm_surf_offset_renderbuffer; + bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; + + return wm_bind_bo_offset; +} + + +/** + * SAMPLER_STATE. See brw_update_sampler_state(). + */ +uint32_t +gen6_blorp_emit_sampler_state(struct brw_context *brw, + unsigned tex_filter, unsigned max_lod, + bool non_normalized_coords) +{ + uint32_t sampler_offset; + uint32_t *sampler_state = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); + + unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; + + /* XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + brw_emit_sampler_state(brw, + sampler_state, + sampler_offset, + tex_filter, /* min filter */ + tex_filter, /* mag filter */ + BRW_MIPFILTER_NONE, + BRW_ANISORATIO_2, + address_rounding, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + 0, /* min LOD */ + max_lod, + 0, /* LOD bias */ + 0, /* shadow function */ + non_normalized_coords, + 0); /* border color offset - unused */ + + return sampler_offset; +} + + +/** + * 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers(). + */ +static void +gen6_blorp_emit_sampler_state_pointers(struct brw_context *brw, + uint32_t sampler_offset) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 | + VS_SAMPLER_STATE_CHANGE | + GS_SAMPLER_STATE_CHANGE | + PS_SAMPLER_STATE_CHANGE | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_BATCH(sampler_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_VS + * + * Disable vertex shader. + */ +void +gen6_blorp_emit_vs_disable(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, + * 3DSTATE_VS, Dword 5.0 "VS Function Enable": + * + * [DevSNB] A pipeline flush must be programmed prior to a + * 3DSTATE_VS command that causes the VS Function Enable to + * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL + * command with CS stall bit set and a post sync operation. + * + * We've already done one at the start of the BLORP operation. + */ + + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_GS + * + * Disable the geometry shader. + */ +void +gen6_blorp_emit_gs_disable(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + /* Disable all the constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + brw->gs.enabled = false; +} + + +/* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The BLORP op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ +void +gen6_blorp_emit_clip_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw2.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) + * and BackFaceFillMode (dw2.5:6) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ +static void +gen6_blorp_emit_sf_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(20); + OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); + OUT_BATCH(params->num_varyings << GEN6_SF_NUM_OUTPUTS_SHIFT | + 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); + OUT_BATCH(0); /* dw2 */ + OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); + for (int i = 0; i < 16; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/** + * Enable or disable thread dispatch and set the HiZ op appropriately. + */ +static void +gen6_blorp_emit_wm_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + uint32_t dw2, dw4, dw5, dw6; + + /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be + * nonzero to prevent the GPU from hanging. While the documentation doesn't + * mention this explicitly, it notes that the valid range for the field is + * [1,39] = [2,40] threads, which excludes zero. + * + * To be safe (and to minimize extraneous code) we go ahead and fully + * configure the WM state whether or not there is a WM program. + */ + + dw2 = dw4 = dw5 = dw6 = 0; + switch (params->hiz_op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + dw4 |= GEN6_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw4 |= GEN6_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_NONE: + break; + default: + unreachable("not reached"); + } + dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; + dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; + dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; + dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ + dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */ + if (params->wm_prog_data) { + dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; + dw5 |= GEN6_WM_16_DISPATCH_ENABLE; + dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ + } + + if (params->src.mt) { + dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */ + dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ + } + + if (params->dst.num_samples > 1) { + dw6 |= GEN6_WM_MSRAST_ON_PATTERN; + if (prog_data && prog_data->persample_msaa_dispatch) + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; + else + dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; + } else { + dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; + dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; + } + + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); + OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(dw2); + OUT_BATCH(0); /* No scratch needed */ + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(dw6); + OUT_BATCH(0); /* No other programs */ + OUT_BATCH(0); /* No other programs */ + ADVANCE_BATCH(); +} + + +static void +gen6_blorp_emit_constant_ps(struct brw_context *brw, + const struct brw_blorp_params *params, + uint32_t wm_push_const_offset) +{ + /* Make sure the push constants fill an exact integer number of + * registers. + */ + assert(sizeof(struct brw_blorp_wm_push_constants) % 32 == 0); + + /* There must be at least one register worth of push constant data. */ + assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); + + /* Enable push constant buffer 0. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | + GEN6_CONSTANT_BUFFER_0_ENABLE | + (5 - 2)); + OUT_BATCH(wm_push_const_offset + (BRW_BLORP_NUM_PUSH_CONST_REGS - 1)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen6_blorp_emit_constant_ps_disable(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + /* Disable the push constant buffers. */ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/** + * 3DSTATE_BINDING_TABLE_POINTERS + */ +static void +gen6_blorp_emit_binding_table_pointers(struct brw_context *brw, + uint32_t wm_bind_bo_offset) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | + GEN6_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* vs -- ignored */ + OUT_BATCH(0); /* gs -- ignored */ + OUT_BATCH(wm_bind_bo_offset); /* wm/ps */ + ADVANCE_BATCH(); +} + + +static void +gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t surfwidth, surfheight; + uint32_t surftype; + unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1); + GLenum gl_target = params->depth.mt->target; + unsigned int lod; + + switch (gl_target) { + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_CUBE_MAP: + /* The PRM claims that we should use BRW_SURFACE_CUBE for this + * situation, but experiments show that gl_Layer doesn't work when we do + * this. So we use BRW_SURFACE_2D, since for rendering purposes this is + * equivalent. + */ + surftype = BRW_SURFACE_2D; + depth *= 6; + break; + default: + surftype = translate_tex_target(gl_target); + break; + } + + const unsigned min_array_element = params->depth.layer; + + lod = params->depth.level - params->depth.mt->first_level; + + if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) { + /* HIZ ops for lod 0 may set the width & height a little + * larger to allow the fast depth clear to fit the hardware + * alignment requirements. (8x4) + */ + surfwidth = params->depth.width; + surfheight = params->depth.height; + } else { + surfwidth = params->depth.mt->logical_width0; + surfheight = params->depth.mt->logical_height0; + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + brw_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(7); + /* 3DSTATE_DEPTH_BUFFER dw0 */ + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + + /* 3DSTATE_DEPTH_BUFFER dw1 */ + OUT_BATCH((params->depth.mt->pitch - 1) | + params->depth_format << 18 | + 1 << 21 | /* separate stencil enable */ + 1 << 22 | /* hiz enable */ + BRW_TILEWALK_YMAJOR << 26 | + 1 << 27 | /* y-tiled */ + surftype << 29); + + /* 3DSTATE_DEPTH_BUFFER dw2 */ + OUT_RELOC(params->depth.mt->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + + /* 3DSTATE_DEPTH_BUFFER dw3 */ + OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | + (surfwidth - 1) << 6 | + (surfheight - 1) << 19 | + lod << 2); + + /* 3DSTATE_DEPTH_BUFFER dw4 */ + OUT_BATCH((depth - 1) << 21 | + min_array_element << 10 | + (depth - 1) << 1); + + /* 3DSTATE_DEPTH_BUFFER dw5 */ + OUT_BATCH(0); + + /* 3DSTATE_DEPTH_BUFFER dw6 */ + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_buf->mt; + uint32_t offset = 0; + + if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { + offset = intel_miptree_get_aligned_offset(hiz_mt, + hiz_mt->level[lod].level_x, + hiz_mt->level[lod].level_y, + false); + } + + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH(hiz_mt->pitch - 1); + OUT_RELOC(hiz_mt->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + offset); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + + +static void +gen6_blorp_emit_depth_disable(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + brw_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | + (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CLEAR_PARAMS + * + * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: + * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE + * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. + */ +static void +gen6_blorp_emit_clear_params(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | + GEN5_DEPTH_CLEAR_VALID | + (2 - 2)); + OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_DRAWING_RECTANGLE */ +void +gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | + ((MAX2(params->y1, params->y0) - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_VIEWPORT_STATE_POINTERS */ +static void +gen6_blorp_emit_viewport_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + struct brw_cc_viewport *ccv; + uint32_t cc_vp_offset; + + ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, + &cc_vp_offset); + + ccv->min_depth = 0.0; + ccv->max_depth = 1.0; + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | + GEN6_CC_VIEWPORT_MODIFY); + OUT_BATCH(0); /* clip VP */ + OUT_BATCH(0); /* SF VP */ + OUT_BATCH(cc_vp_offset); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +static void +gen6_blorp_emit_primitive(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(6); + OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | + _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | + GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(params->num_layers); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/** + * \brief Execute a blit or render pass operation. + * + * To execute the operation, this function manually constructs and emits a + * batch to draw a rectangle primitive. The batchbuffer is flushed before + * constructing and after emitting the batch. + * + * This function alters no GL state. + */ +void +gen6_blorp_exec(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t cc_blend_state_offset = 0; + uint32_t cc_state_offset = 0; + uint32_t depthstencil_offset; + uint32_t wm_push_const_offset = 0; + uint32_t wm_bind_bo_offset = 0; + + /* Emit workaround flushes when we switch from drawing to blorping. */ + brw_emit_post_sync_nonzero_flush(brw); + + if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) + brw_state_base_address.emit(brw); + + gen6_emit_3dstate_multisample(brw, params->dst.num_samples); + gen6_emit_3dstate_sample_mask(brw, + params->dst.num_samples > 1 ? + (1 << params->dst.num_samples) - 1 : 1); + gen6_blorp_emit_vertices(brw, params); + gen6_blorp_emit_urb_config(brw, params); + if (params->wm_prog_data) { + cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); + cc_state_offset = gen6_blorp_emit_cc_state(brw); + } + depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); + gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset, + depthstencil_offset, cc_state_offset); + if (params->wm_prog_data) { + uint32_t wm_surf_offset_renderbuffer; + uint32_t wm_surf_offset_texture = 0; + wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); + intel_miptree_used_for_rendering(params->dst.mt); + wm_surf_offset_renderbuffer = + gen6_blorp_emit_surface_state(brw, params, ¶ms->dst, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER); + if (params->src.mt) { + wm_surf_offset_texture = + gen6_blorp_emit_surface_state(brw, params, ¶ms->src, + I915_GEM_DOMAIN_SAMPLER, 0); + } + wm_bind_bo_offset = + gen6_blorp_emit_binding_table(brw, + wm_surf_offset_renderbuffer, + wm_surf_offset_texture); + } + + if (params->src.mt) { + const uint32_t sampler_offset = + gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); + gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset); + } + gen6_blorp_emit_vs_disable(brw, params); + gen6_blorp_emit_gs_disable(brw, params); + gen6_blorp_emit_clip_disable(brw); + gen6_blorp_emit_sf_config(brw, params); + if (params->wm_prog_data) + gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset); + else + gen6_blorp_emit_constant_ps_disable(brw, params); + gen6_blorp_emit_wm_config(brw, params); + if (params->wm_prog_data) + gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset); + gen6_blorp_emit_viewport_state(brw, params); + + if (params->depth.mt) + gen6_blorp_emit_depth_stencil_config(brw, params); + else + gen6_blorp_emit_depth_disable(brw, params); + gen6_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen6_blorp_emit_primitive(brw, params); +} diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp deleted file mode 100644 index ee3276ee460..00000000000 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ /dev/null @@ -1,1050 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_state.h" - -#include "brw_blorp.h" - -static void -gen6_blorp_emit_vertex_buffer_state(struct brw_context *brw, - unsigned num_elems, - unsigned vbo_size, - uint32_t vertex_offset) -{ - /* 3DSTATE_VERTEX_BUFFERS */ - const int num_buffers = 1; - const int batch_length = 1 + 4 * num_buffers; - - uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA | - (num_elems * sizeof(float)) << BRW_VB0_PITCH_SHIFT; - - if (brw->gen >= 7) - dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE; - - switch (brw->gen) { - case 7: - dw0 |= GEN7_MOCS_L3 << 16; - break; - case 8: - dw0 |= BDW_MOCS_WB << 16; - break; - case 9: - dw0 |= SKL_MOCS_WB << 16; - break; - } - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); - OUT_BATCH(dw0); - if (brw->gen >= 8) { - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, vertex_offset); - OUT_BATCH(vbo_size); - } else { - /* start address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset); - /* end address */ - OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0, - vertex_offset + vbo_size - 1); - OUT_BATCH(0); - } - ADVANCE_BATCH(); -} - -void -gen6_blorp_emit_vertices(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t vertex_offset; - - /* Setup VBO for the rectangle primitive.. - * - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three - * vertices. The vertices reside in screen space with DirectX coordinates - * (that is, (0, 0) is the upper left corner). - * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 - * - * Since the VS is disabled, the clipper loads each VUE directly from - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: - * dw0: Reserved, MBZ. - * dw1: Render Target Array Index. The HiZ op does not use indexed - * vertices, so set the dword to 0. - * dw2: Viewport Index. The HiZ op disables viewport mapping and - * scissoring, so set the dword to 0. - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so - * set the dword to 0. - * dw4: Vertex Position X. - * dw5: Vertex Position Y. - * dw6: Vertex Position Z. - * dw7: Vertex Position W. - * - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 - * "Vertex URB Entry (VUE) Formats". - * - * Only vertex position X and Y are going to be variable, Z is fixed to - * zero and W to one. Header words dw0-3 are all zero. There is no need to - * include the fixed values in the vertex buffer. Vertex fetcher can be - * instructed to fill vertex elements with constant values of one and zero - * instead of reading them from the buffer. See the vertex element setup - * below. - */ - { - float *vertex_data; - - const float vertices[] = { - /* v0 */ (float)params->x0, (float)params->y1, - /* v1 */ (float)params->x1, (float)params->y1, - /* v2 */ (float)params->x0, (float)params->y0, - }; - - vertex_data = (float *) brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, - sizeof(vertices), 32, - &vertex_offset); - memcpy(vertex_data, vertices, sizeof(vertices)); - - const unsigned blorp_num_vue_elems = 2; - gen6_blorp_emit_vertex_buffer_state(brw, blorp_num_vue_elems, - sizeof(vertices), vertex_offset); - } - - /* 3DSTATE_VERTEX_ELEMENTS - * - * Fetch dwords 0 - 7 from each VUE. See the comments above where - * the vertex_bo is filled with data. - */ - { - const int num_elements = 2; - const int batch_length = 1 + 2 * num_elements; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); - /* Element 0 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); - /* Element 1 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); - ADVANCE_BATCH(); - } -} - - -/* 3DSTATE_URB - * - * Assign the entire URB to the VS. Even though the VS disabled, URB space - * is still needed because the clipper loads the VUE's from the URB. From - * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE, - * Dword 1.15:0 "VS Number of URB Entries": - * This field is always used (even if VS Function Enable is DISABLED). - * - * The warning below appears in the PRM (Section 3DSTATE_URB), but we can - * safely ignore it because this batch contains only one draw call. - * Because of URB corruption caused by allocating a previous GS unit - * URB entry to the VS unit, software is required to send a “GS NULL - * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0) - * plus a dummy DRAW call before any case where VS will be taking over - * GS URB space. - */ -static void -gen6_blorp_emit_urb_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2)); - OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* BLEND_STATE */ -uint32_t -gen6_blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t cc_blend_state_offset; - - assume(params->num_draw_buffers); - - const unsigned size = params->num_draw_buffers * - sizeof(struct gen6_blend_state); - struct gen6_blend_state *blend = (struct gen6_blend_state *) - brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, - &cc_blend_state_offset); - - memset(blend, 0, size); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - blend[i].blend1.pre_blend_clamp_enable = 1; - blend[i].blend1.post_blend_clamp_enable = 1; - blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; - - blend[i].blend1.write_disable_r = params->color_write_disable[0]; - blend[i].blend1.write_disable_g = params->color_write_disable[1]; - blend[i].blend1.write_disable_b = params->color_write_disable[2]; - blend[i].blend1.write_disable_a = params->color_write_disable[3]; - } - - return cc_blend_state_offset; -} - - -/* CC_STATE */ -uint32_t -gen6_blorp_emit_cc_state(struct brw_context *brw) -{ - uint32_t cc_state_offset; - - struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) - brw_state_batch(brw, AUB_TRACE_CC_STATE, - sizeof(gen6_color_calc_state), 64, - &cc_state_offset); - memset(cc, 0, sizeof(*cc)); - - return cc_state_offset; -} - - -/** - * \param out_offset is relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -uint32_t -gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t depthstencil_offset; - - struct gen6_depth_stencil_state *state; - state = (struct gen6_depth_stencil_state *) - brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - sizeof(*state), 64, - &depthstencil_offset); - memset(state, 0, sizeof(*state)); - - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - state->ds2.depth_write_enable = 1; - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - state->ds2.depth_test_enable = 1; - state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; - } - - return depthstencil_offset; -} - - -/* 3DSTATE_CC_STATE_POINTERS - * - * The pointer offsets are relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - * - * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE. - */ -static void -gen6_blorp_emit_cc_state_pointers(struct brw_context *brw, - const struct brw_blorp_params *params, - uint32_t cc_blend_state_offset, - uint32_t depthstencil_offset, - uint32_t cc_state_offset) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2)); - OUT_BATCH(cc_blend_state_offset | 1); /* BLEND_STATE offset */ - OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */ - OUT_BATCH(cc_state_offset | 1); /* COLOR_CALC_STATE offset */ - ADVANCE_BATCH(); -} - - -/* WM push constants */ -uint32_t -gen6_blorp_emit_wm_constants(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t wm_push_const_offset; - - void *constants = brw_state_batch(brw, AUB_TRACE_WM_CONSTANTS, - sizeof(params->wm_push_consts), - 32, &wm_push_const_offset); - memcpy(constants, ¶ms->wm_push_consts, - sizeof(params->wm_push_consts)); - - return wm_push_const_offset; -} - - -/* SURFACE_STATE for renderbuffer or texture surface (see - * brw_update_renderbuffer_surface and brw_update_texture_surface) - */ -static uint32_t -gen6_blorp_emit_surface_state(struct brw_context *brw, - const struct brw_blorp_params *params, - const struct brw_blorp_surface_info *surface, - uint32_t read_domains, uint32_t write_domain) -{ - uint32_t wm_surf_offset; - uint32_t width = surface->width; - uint32_t height = surface->height; - if (surface->num_samples > 1) { - /* Since gen6 uses INTEL_MSAA_LAYOUT_IMS, width and height are measured - * in samples. But SURFACE_STATE wants them in pixels, so we need to - * divide them each by 2. - */ - width /= 2; - height /= 2; - } - struct intel_mipmap_tree *mt = surface->mt; - uint32_t tile_x, tile_y; - - uint32_t *surf = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, - &wm_surf_offset); - - surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | - BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT | - BRW_SURFACE_CUBEFACE_ENABLES | - surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT); - - /* reloc */ - surf[1] = (brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + - mt->bo->offset64); - - surf[2] = (0 << BRW_SURFACE_LOD_SHIFT | - (width - 1) << BRW_SURFACE_WIDTH_SHIFT | - (height - 1) << BRW_SURFACE_HEIGHT_SHIFT); - - uint32_t tiling = surface->map_stencil_as_y_tiled - ? BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y - : brw_get_surface_tiling_bits(mt->tiling); - uint32_t pitch_bytes = mt->pitch; - if (surface->map_stencil_as_y_tiled) - pitch_bytes *= 2; - surf[3] = (tiling | - 0 << BRW_SURFACE_DEPTH_SHIFT | - (pitch_bytes - 1) << BRW_SURFACE_PITCH_SHIFT); - - surf[4] = brw_get_surface_num_multisamples(surface->num_samples); - - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | - (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | - (surface->mt->valign == 4 ? - BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); - - /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(brw->batch.bo, - wm_surf_offset + 4, - mt->bo, - surf[1] - mt->bo->offset64, - read_domains, write_domain); - - return wm_surf_offset; -} - - -/* BINDING_TABLE. See brw_wm_binding_table(). */ -uint32_t -gen6_blorp_emit_binding_table(struct brw_context *brw, - uint32_t wm_surf_offset_renderbuffer, - uint32_t wm_surf_offset_texture) -{ - uint32_t wm_bind_bo_offset; - uint32_t *bind = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * - BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, - 32, /* alignment */ - &wm_bind_bo_offset); - bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = - wm_surf_offset_renderbuffer; - bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; - - return wm_bind_bo_offset; -} - - -/** - * SAMPLER_STATE. See brw_update_sampler_state(). - */ -uint32_t -gen6_blorp_emit_sampler_state(struct brw_context *brw, - unsigned tex_filter, unsigned max_lod, - bool non_normalized_coords) -{ - uint32_t sampler_offset; - uint32_t *sampler_state = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); - - unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; - - /* XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - brw_emit_sampler_state(brw, - sampler_state, - sampler_offset, - tex_filter, /* min filter */ - tex_filter, /* mag filter */ - BRW_MIPFILTER_NONE, - BRW_ANISORATIO_2, - address_rounding, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - 0, /* min LOD */ - max_lod, - 0, /* LOD bias */ - 0, /* shadow function */ - non_normalized_coords, - 0); /* border color offset - unused */ - - return sampler_offset; -} - - -/** - * 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers(). - */ -static void -gen6_blorp_emit_sampler_state_pointers(struct brw_context *brw, - uint32_t sampler_offset) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 | - VS_SAMPLER_STATE_CHANGE | - GS_SAMPLER_STATE_CHANGE | - PS_SAMPLER_STATE_CHANGE | - (4 - 2)); - OUT_BATCH(0); /* VS */ - OUT_BATCH(0); /* GS */ - OUT_BATCH(sampler_offset); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_VS - * - * Disable vertex shader. - */ -void -gen6_blorp_emit_vs_disable(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, - * 3DSTATE_VS, Dword 5.0 "VS Function Enable": - * - * [DevSNB] A pipeline flush must be programmed prior to a - * 3DSTATE_VS command that causes the VS Function Enable to - * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL - * command with CS stall bit set and a post sync operation. - * - * We've already done one at the start of the BLORP operation. - */ - - /* Disable the push constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_GS - * - * Disable the geometry shader. - */ -void -gen6_blorp_emit_gs_disable(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* Disable all the constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - brw->gs.enabled = false; -} - - -/* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The BLORP op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ -void -gen6_blorp_emit_clip_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw2.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3) - * and BackFaceFillMode (dw2.5:6) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ -static void -gen6_blorp_emit_sf_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(20); - OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2)); - OUT_BATCH(params->num_varyings << GEN6_SF_NUM_OUTPUTS_SHIFT | - 1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << - GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT); - OUT_BATCH(0); /* dw2 */ - OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); - for (int i = 0; i < 16; ++i) - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/** - * Enable or disable thread dispatch and set the HiZ op appropriately. - */ -static void -gen6_blorp_emit_wm_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw2, dw4, dw5, dw6; - - /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be - * nonzero to prevent the GPU from hanging. While the documentation doesn't - * mention this explicitly, it notes that the valid range for the field is - * [1,39] = [2,40] threads, which excludes zero. - * - * To be safe (and to minimize extraneous code) we go ahead and fully - * configure the WM state whether or not there is a WM program. - */ - - dw2 = dw4 = dw5 = dw6 = 0; - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - dw4 |= GEN6_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw4 |= GEN6_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; - dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; - dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; - dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ - dw6 |= 0 << GEN6_WM_NUM_SF_OUTPUTS_SHIFT; /* No inputs from SF */ - if (params->wm_prog_data) { - dw4 |= prog_data->first_curbe_grf << GEN6_WM_DISPATCH_START_GRF_SHIFT_0; - dw5 |= GEN6_WM_16_DISPATCH_ENABLE; - dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */ - } - - if (params->src.mt) { - dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */ - dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ - } - - if (params->dst.num_samples > 1) { - dw6 |= GEN6_WM_MSRAST_ON_PATTERN; - if (prog_data && prog_data->persample_msaa_dispatch) - dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; - else - dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL; - } else { - dw6 |= GEN6_WM_MSRAST_OFF_PIXEL; - dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE; - } - - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2)); - OUT_BATCH(params->wm_prog_kernel); - OUT_BATCH(dw2); - OUT_BATCH(0); /* No scratch needed */ - OUT_BATCH(dw4); - OUT_BATCH(dw5); - OUT_BATCH(dw6); - OUT_BATCH(0); /* No other programs */ - OUT_BATCH(0); /* No other programs */ - ADVANCE_BATCH(); -} - - -static void -gen6_blorp_emit_constant_ps(struct brw_context *brw, - const struct brw_blorp_params *params, - uint32_t wm_push_const_offset) -{ - /* Make sure the push constants fill an exact integer number of - * registers. - */ - assert(sizeof(struct brw_blorp_wm_push_constants) % 32 == 0); - - /* There must be at least one register worth of push constant data. */ - assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); - - /* Enable push constant buffer 0. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | - GEN6_CONSTANT_BUFFER_0_ENABLE | - (5 - 2)); - OUT_BATCH(wm_push_const_offset + (BRW_BLORP_NUM_PUSH_CONST_REGS - 1)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen6_blorp_emit_constant_ps_disable(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* Disable the push constant buffers. */ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/** - * 3DSTATE_BINDING_TABLE_POINTERS - */ -static void -gen6_blorp_emit_binding_table_pointers(struct brw_context *brw, - uint32_t wm_bind_bo_offset) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | - GEN6_BINDING_TABLE_MODIFY_PS | - (4 - 2)); - OUT_BATCH(0); /* vs -- ignored */ - OUT_BATCH(0); /* gs -- ignored */ - OUT_BATCH(wm_bind_bo_offset); /* wm/ps */ - ADVANCE_BATCH(); -} - - -static void -gen6_blorp_emit_depth_stencil_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t surfwidth, surfheight; - uint32_t surftype; - unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1); - GLenum gl_target = params->depth.mt->target; - unsigned int lod; - - switch (gl_target) { - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_CUBE_MAP: - /* The PRM claims that we should use BRW_SURFACE_CUBE for this - * situation, but experiments show that gl_Layer doesn't work when we do - * this. So we use BRW_SURFACE_2D, since for rendering purposes this is - * equivalent. - */ - surftype = BRW_SURFACE_2D; - depth *= 6; - break; - default: - surftype = translate_tex_target(gl_target); - break; - } - - const unsigned min_array_element = params->depth.layer; - - lod = params->depth.level - params->depth.mt->first_level; - - if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) { - /* HIZ ops for lod 0 may set the width & height a little - * larger to allow the fast depth clear to fit the hardware - * alignment requirements. (8x4) - */ - surfwidth = params->depth.width; - surfheight = params->depth.height; - } else { - surfwidth = params->depth.mt->logical_width0; - surfheight = params->depth.mt->logical_height0; - } - - /* 3DSTATE_DEPTH_BUFFER */ - { - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(7); - /* 3DSTATE_DEPTH_BUFFER dw0 */ - OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - - /* 3DSTATE_DEPTH_BUFFER dw1 */ - OUT_BATCH((params->depth.mt->pitch - 1) | - params->depth_format << 18 | - 1 << 21 | /* separate stencil enable */ - 1 << 22 | /* hiz enable */ - BRW_TILEWALK_YMAJOR << 26 | - 1 << 27 | /* y-tiled */ - surftype << 29); - - /* 3DSTATE_DEPTH_BUFFER dw2 */ - OUT_RELOC(params->depth.mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - - /* 3DSTATE_DEPTH_BUFFER dw3 */ - OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 | - (surfwidth - 1) << 6 | - (surfheight - 1) << 19 | - lod << 2); - - /* 3DSTATE_DEPTH_BUFFER dw4 */ - OUT_BATCH((depth - 1) << 21 | - min_array_element << 10 | - (depth - 1) << 1); - - /* 3DSTATE_DEPTH_BUFFER dw5 */ - OUT_BATCH(0); - - /* 3DSTATE_DEPTH_BUFFER dw6 */ - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HIER_DEPTH_BUFFER */ - { - struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_buf->mt; - uint32_t offset = 0; - - if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - offset = intel_miptree_get_aligned_offset(hiz_mt, - hiz_mt->level[lod].level_x, - hiz_mt->level[lod].level_y, - false); - } - - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH(hiz_mt->pitch - 1); - OUT_RELOC(hiz_mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - offset); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STENCIL_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - - -static void -gen6_blorp_emit_depth_disable(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | - (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_CLEAR_PARAMS - * - * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS: - * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE - * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes. - */ -static void -gen6_blorp_emit_clear_params(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | - GEN5_DEPTH_CLEAR_VALID | - (2 - 2)); - OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_DRAWING_RECTANGLE */ -void -gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | - ((MAX2(params->y1, params->y0) - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_VIEWPORT_STATE_POINTERS */ -static void -gen6_blorp_emit_viewport_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - struct brw_cc_viewport *ccv; - uint32_t cc_vp_offset; - - ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, - sizeof(*ccv), 32, - &cc_vp_offset); - - ccv->min_depth = 0.0; - ccv->max_depth = 1.0; - - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) | - GEN6_CC_VIEWPORT_MODIFY); - OUT_BATCH(0); /* clip VP */ - OUT_BATCH(0); /* SF VP */ - OUT_BATCH(cc_vp_offset); - ADVANCE_BATCH(); -} - - -/* 3DPRIMITIVE */ -static void -gen6_blorp_emit_primitive(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(6); - OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) | - _3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT | - GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(params->num_layers); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/** - * \brief Execute a blit or render pass operation. - * - * To execute the operation, this function manually constructs and emits a - * batch to draw a rectangle primitive. The batchbuffer is flushed before - * constructing and after emitting the batch. - * - * This function alters no GL state. - */ -void -gen6_blorp_exec(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t cc_blend_state_offset = 0; - uint32_t cc_state_offset = 0; - uint32_t depthstencil_offset; - uint32_t wm_push_const_offset = 0; - uint32_t wm_bind_bo_offset = 0; - - /* Emit workaround flushes when we switch from drawing to blorping. */ - brw_emit_post_sync_nonzero_flush(brw); - - if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) - brw_state_base_address.emit(brw); - - gen6_emit_3dstate_multisample(brw, params->dst.num_samples); - gen6_emit_3dstate_sample_mask(brw, - params->dst.num_samples > 1 ? - (1 << params->dst.num_samples) - 1 : 1); - gen6_blorp_emit_vertices(brw, params); - gen6_blorp_emit_urb_config(brw, params); - if (params->wm_prog_data) { - cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); - cc_state_offset = gen6_blorp_emit_cc_state(brw); - } - depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); - gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset, - depthstencil_offset, cc_state_offset); - if (params->wm_prog_data) { - uint32_t wm_surf_offset_renderbuffer; - uint32_t wm_surf_offset_texture = 0; - wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); - intel_miptree_used_for_rendering(params->dst.mt); - wm_surf_offset_renderbuffer = - gen6_blorp_emit_surface_state(brw, params, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER); - if (params->src.mt) { - wm_surf_offset_texture = - gen6_blorp_emit_surface_state(brw, params, ¶ms->src, - I915_GEM_DOMAIN_SAMPLER, 0); - } - wm_bind_bo_offset = - gen6_blorp_emit_binding_table(brw, - wm_surf_offset_renderbuffer, - wm_surf_offset_texture); - } - - if (params->src.mt) { - const uint32_t sampler_offset = - gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset); - } - gen6_blorp_emit_vs_disable(brw, params); - gen6_blorp_emit_gs_disable(brw, params); - gen6_blorp_emit_clip_disable(brw); - gen6_blorp_emit_sf_config(brw, params); - if (params->wm_prog_data) - gen6_blorp_emit_constant_ps(brw, params, wm_push_const_offset); - else - gen6_blorp_emit_constant_ps_disable(brw, params); - gen6_blorp_emit_wm_config(brw, params); - if (params->wm_prog_data) - gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset); - gen6_blorp_emit_viewport_state(brw, params); - - if (params->depth.mt) - gen6_blorp_emit_depth_stencil_config(brw, params); - else - gen6_blorp_emit_depth_disable(brw, params); - gen6_blorp_emit_clear_params(brw, params); - gen6_blorp_emit_drawing_rectangle(brw, params); - gen6_blorp_emit_primitive(brw, params); -} - diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c new file mode 100644 index 00000000000..e2e6072410c --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -0,0 +1,885 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_state.h" + +#include "brw_blorp.h" + +static bool +gen7_blorp_skip_urb_config(const struct brw_context *brw) +{ + if (brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) + return false; + + /* Vertex buffer takes 24 bytes. As the size is expressed in 64 bytes, + * one will suffice, otherwise the setup can be any valid configuration. + */ + return brw->urb.vsize > 0; +} + +/* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: + * + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ +void +gen7_blorp_emit_urb_config(struct brw_context *brw) +{ + /* URB allocations must be done in 8k chunks. */ + const unsigned chunk_size_bytes = 8192; + const unsigned urb_size = + (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16; + const unsigned push_constant_bytes = 1024 * urb_size; + const unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + const unsigned vs_size = 1; + const unsigned vs_start = push_constant_chunks; + const unsigned vs_chunks = + DIV_ROUND_UP(brw->urb.min_vs_entries * vs_size * 64, chunk_size_bytes); + + if (gen7_blorp_skip_urb_config(brw)) + return; + + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; + + gen7_emit_push_constant_state(brw, + urb_size / 2 /* vs_size */, + 0 /* hs_size */, + 0 /* ds_size */, + 0 /* gs_size */, + urb_size / 2 /* fs_size */); + + gen7_emit_urb_state(brw, + brw->urb.min_vs_entries /* num_vs_entries */, + vs_size, + vs_start, + 0 /* num_hs_entries */, + 1 /* hs_size */, + vs_start + vs_chunks /* hs_start */, + 0 /* num_ds_entries */, + 1 /* ds_size */, + vs_start + vs_chunks /* ds_start */, + 0 /* num_gs_entries */, + 1 /* gs_size */, + vs_start + vs_chunks /* gs_start */); +} + + +/* 3DSTATE_BLEND_STATE_POINTERS */ +void +gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, + uint32_t cc_blend_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_blend_state_offset | 1); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CC_STATE_POINTERS */ +void +gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, + uint32_t cc_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_state_offset | 1); + ADVANCE_BATCH(); +} + +void +gen7_blorp_emit_cc_viewport(struct brw_context *brw) +{ + struct brw_cc_viewport *ccv; + uint32_t cc_vp_offset; + + ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, + &cc_vp_offset); + ccv->min_depth = 0.0; + ccv->max_depth = 1.0; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); + OUT_BATCH(cc_vp_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS + * + * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +static void +gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, + uint32_t depthstencil_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(depthstencil_offset | 1); + ADVANCE_BATCH(); +} + + +/* SURFACE_STATE for renderbuffer or texture surface (see + * brw_update_renderbuffer_surface and brw_update_texture_surface) + */ +static uint32_t +gen7_blorp_emit_surface_state(struct brw_context *brw, + const struct brw_blorp_surface_info *surface, + uint32_t read_domains, uint32_t write_domain, + bool is_render_target) +{ + uint32_t wm_surf_offset; + uint32_t width = surface->width; + uint32_t height = surface->height; + /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for + * color surfaces, width and height are measured in pixels; we don't need + * to divide them by 2 as we do for Gen6 (see + * gen6_blorp_emit_surface_state). + */ + struct intel_mipmap_tree *mt = surface->mt; + uint32_t tile_x, tile_y; + const uint8_t mocs = GEN7_MOCS_L3; + + uint32_t tiling = surface->map_stencil_as_y_tiled + ? I915_TILING_Y : mt->tiling; + + uint32_t *surf = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset); + memset(surf, 0, 8 * 4); + + surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | + gen7_surface_tiling_mode(tiling); + + if (surface->mt->valign == 4) + surf[0] |= GEN7_SURFACE_VALIGN_4; + if (surface->mt->halign == 8) + surf[0] |= GEN7_SURFACE_HALIGN_8; + + if (surface->array_layout == ALL_SLICES_AT_EACH_LOD) + surf[0] |= GEN7_SURFACE_ARYSPC_LOD0; + else + surf[0] |= GEN7_SURFACE_ARYSPC_FULL; + + /* reloc */ + surf[1] = brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + + mt->bo->offset64; + + /* Note that the low bits of these fields are missing, so + * there's the possibility of getting in trouble. + */ + assert(tile_x % 4 == 0); + assert(tile_y % 2 == 0); + surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | + SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) | + SET_FIELD(mocs, GEN7_SURFACE_MOCS); + + surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) | + SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT); + + uint32_t pitch_bytes = mt->pitch; + if (surface->map_stencil_as_y_tiled) + pitch_bytes *= 2; + surf[3] = pitch_bytes - 1; + + surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout); + if (surface->mt->mcs_mt) { + gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt, + is_render_target); + } + + surf[7] = surface->mt->fast_clear_color_value; + + if (brw->is_haswell) { + surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); + } + + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(brw->batch.bo, + wm_surf_offset + 4, + mt->bo, + surf[1] - mt->bo->offset64, + read_domains, write_domain); + + gen7_check_surface_setup(surf, is_render_target); + + return wm_surf_offset; +} + + +/* 3DSTATE_VS + * + * Disable vertex shader. + */ +static void +gen7_blorp_emit_vs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_HS + * + * Disable the hull shader. + */ +static void +gen7_blorp_emit_hs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_TE + * + * Disable the tesselation engine. + */ +void +gen7_blorp_emit_te_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_DS + * + * Disable the domain shader. + */ +static void +gen7_blorp_emit_ds_disable(struct brw_context *brw) +{ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(6); + OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_GS + * + * Disable the geometry shader. + */ +static void +gen7_blorp_emit_gs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + /** + * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > + * Geometry > Geometry Shader > State: + * + * "Note: Because of corruption in IVB:GT2, software needs to flush the + * whole fixed function pipeline when the GS enable changes value in + * the 3DSTATE_GS." + * + * The hardware architects have clarified that in this context "flush the + * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS + * Stall" bit set. + */ + if (brw->gen < 8 && !brw->is_haswell && brw->gt == 2 && brw->gs.enabled) + gen7_emit_cs_stall_flush(brw); + + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + brw->gs.enabled = false; +} + +/* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ +static void +gen7_blorp_emit_streamout_disable(struct brw_context *brw) +{ + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +static void +gen7_blorp_emit_sf_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + /* 3DSTATE_SF + * + * Disable ViewportTransformEnable (dw1.1) + * + * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D + * Primitives Overview": + * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the + * use of screen- space coordinates). + * + * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) + * and BackFaceFillMode (dw1.4:3) to SOLID(0). + * + * From the Sandy Bridge PRM, Volume 2, Part 1, Section + * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: + * SOLID: Any triangle or rectangle object found to be front-facing + * is rendered as a solid object. This setting is required when + * (rendering rectangle (RECTLIST) objects. + */ + { + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); + OUT_BATCH(params->depth_format << + GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); + OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_SBE */ + { + BEGIN_BATCH(14); + OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); + OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | + params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); + for (int i = 0; i < 12; ++i) + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + + +/** + * Disable thread dispatch (dw5.19) and enable the HiZ op. + */ +static void +gen7_blorp_emit_wm_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + uint32_t dw1 = 0, dw2 = 0; + + switch (params->hiz_op) { + case GEN6_HIZ_OP_DEPTH_CLEAR: + dw1 |= GEN7_WM_DEPTH_CLEAR; + break; + case GEN6_HIZ_OP_DEPTH_RESOLVE: + dw1 |= GEN7_WM_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_HIZ_RESOLVE: + dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; + break; + case GEN6_HIZ_OP_NONE: + break; + default: + unreachable("not reached"); + } + dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; + dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; + dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ + + if (params->wm_prog_data) + dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ + + if (params->src.mt) + dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */ + + if (params->dst.num_samples > 1) { + dw1 |= GEN7_WM_MSRAST_ON_PATTERN; + if (prog_data && prog_data->persample_msaa_dispatch) + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; + else + dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; + } else { + dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; + dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; + } + + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); + OUT_BATCH(dw1); + OUT_BATCH(dw2); + ADVANCE_BATCH(); +} + + +/** + * 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the + * valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ +static void +gen7_blorp_emit_ps_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + uint32_t dw2, dw4, dw5; + const int max_threads_shift = brw->is_haswell ? + HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; + + dw2 = dw4 = dw5 = 0; + dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; + + /* If there's a WM program, we need to do 16-pixel dispatch since that's + * what the program is compiled for. If there isn't, then it shouldn't + * matter because no program is actually being run. However, the hardware + * gets angry if we don't enable at least one dispatch mode, so just enable + * 16-pixel dispatch unconditionally. + */ + dw4 |= GEN7_PS_16_DISPATCH_ENABLE; + + if (brw->is_haswell) + dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ + if (params->wm_prog_data) { + dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; + dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + } + + if (params->src.mt) + dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ + + dw4 |= params->fast_clear_op; + + BEGIN_BATCH(8); + OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); + OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(dw2); + OUT_BATCH(0); + OUT_BATCH(dw4); + OUT_BATCH(dw5); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, + uint32_t wm_bind_bo_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(wm_bind_bo_offset); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, + uint32_t sampler_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(sampler_offset); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_constant_ps(struct brw_context *brw, + uint32_t wm_push_const_offset) +{ + const uint8_t mocs = GEN7_MOCS_L3; + + /* Make sure the push constants fill an exact integer number of + * registers. + */ + assert(sizeof(struct brw_blorp_wm_push_constants) % 32 == 0); + + /* There must be at least one register worth of push constant data. */ + assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); + + /* Enable push constant buffer 0. */ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | + (7 - 2)); + OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); + OUT_BATCH(0); + OUT_BATCH(wm_push_const_offset | mocs); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +void +gen7_blorp_emit_constant_ps_disable(struct brw_context *brw) +{ + BEGIN_BATCH(7); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const uint8_t mocs = GEN7_MOCS_L3; + uint32_t surfwidth, surfheight; + uint32_t surftype; + unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1); + unsigned int min_array_element; + GLenum gl_target = params->depth.mt->target; + unsigned int lod; + + switch (gl_target) { + case GL_TEXTURE_CUBE_MAP_ARRAY: + case GL_TEXTURE_CUBE_MAP: + /* The PRM claims that we should use BRW_SURFACE_CUBE for this + * situation, but experiments show that gl_Layer doesn't work when we do + * this. So we use BRW_SURFACE_2D, since for rendering purposes this is + * equivalent. + */ + surftype = BRW_SURFACE_2D; + depth *= 6; + break; + default: + surftype = translate_tex_target(gl_target); + break; + } + + min_array_element = params->depth.layer; + if (params->depth.mt->num_samples > 1) { + /* Convert physical layer to logical layer. */ + min_array_element /= params->depth.mt->num_samples; + } + + lod = params->depth.level - params->depth.mt->first_level; + + if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) { + /* HIZ ops for lod 0 may set the width & height a little + * larger to allow the fast depth clear to fit the hardware + * alignment requirements. (8x4) + */ + surfwidth = params->depth.width; + surfheight = params->depth.height; + } else { + surfwidth = params->depth.mt->logical_width0; + surfheight = params->depth.mt->logical_height0; + } + + /* 3DSTATE_DEPTH_BUFFER */ + { + brw_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(7); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH((params->depth.mt->pitch - 1) | + params->depth_format << 18 | + 1 << 22 | /* hiz enable */ + 1 << 28 | /* depth write */ + surftype << 29); + OUT_RELOC(params->depth.mt->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + OUT_BATCH((surfwidth - 1) << 4 | + (surfheight - 1) << 18 | + lod); + OUT_BATCH(((depth - 1) << 21) | + (min_array_element << 10) | + mocs); + OUT_BATCH(0); + OUT_BATCH((depth - 1) << 21); + ADVANCE_BATCH(); + } + + /* 3DSTATE_HIER_DEPTH_BUFFER */ + { + struct intel_miptree_aux_buffer *hiz_buf = params->depth.mt->hiz_buf; + + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); + OUT_BATCH((mocs << 25) | + (hiz_buf->pitch - 1)); + OUT_RELOC(hiz_buf->bo, + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + 0); + ADVANCE_BATCH(); + } + + /* 3DSTATE_STENCIL_BUFFER */ + { + BEGIN_BATCH(3); + OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + + +static void +gen7_blorp_emit_depth_disable(struct brw_context *brw) +{ + brw_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(7); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); + OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CLEAR_PARAMS + * + * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 + * 3DSTATE_CLEAR_PARAMS: + * 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ +void +gen7_blorp_emit_clear_params(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0); + OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +void +gen7_blorp_emit_primitive(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(params->num_layers); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/** + * \copydoc gen6_blorp_exec() + */ +void +gen7_blorp_exec(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + if (brw->gen >= 8) + return; + + uint32_t cc_blend_state_offset = 0; + uint32_t cc_state_offset = 0; + uint32_t depthstencil_offset; + uint32_t wm_push_const_offset = 0; + uint32_t wm_bind_bo_offset = 0; + + if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) + brw_state_base_address.emit(brw); + + gen6_emit_3dstate_multisample(brw, params->dst.num_samples); + gen6_emit_3dstate_sample_mask(brw, + params->dst.num_samples > 1 ? + (1 << params->dst.num_samples) - 1 : 1); + gen6_blorp_emit_vertices(brw, params); + gen7_blorp_emit_urb_config(brw); + if (params->wm_prog_data) { + cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); + cc_state_offset = gen6_blorp_emit_cc_state(brw); + gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); + gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); + } + depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); + gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset); + if (brw->use_resource_streamer) + gen7_disable_hw_binding_tables(brw); + if (params->wm_prog_data) { + uint32_t wm_surf_offset_renderbuffer; + uint32_t wm_surf_offset_texture = 0; + wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); + intel_miptree_used_for_rendering(params->dst.mt); + wm_surf_offset_renderbuffer = + gen7_blorp_emit_surface_state(brw, ¶ms->dst, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + true /* is_render_target */); + if (params->src.mt) { + wm_surf_offset_texture = + gen7_blorp_emit_surface_state(brw, ¶ms->src, + I915_GEM_DOMAIN_SAMPLER, 0, + false /* is_render_target */); + } + wm_bind_bo_offset = + gen6_blorp_emit_binding_table(brw, + wm_surf_offset_renderbuffer, + wm_surf_offset_texture); + } + gen7_blorp_emit_vs_disable(brw); + gen7_blorp_emit_hs_disable(brw); + gen7_blorp_emit_te_disable(brw); + gen7_blorp_emit_ds_disable(brw); + gen7_blorp_emit_gs_disable(brw); + gen7_blorp_emit_streamout_disable(brw); + gen6_blorp_emit_clip_disable(brw); + gen7_blorp_emit_sf_config(brw, params); + gen7_blorp_emit_wm_config(brw, params); + if (params->wm_prog_data) { + gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); + gen7_blorp_emit_constant_ps(brw, wm_push_const_offset); + } else { + gen7_blorp_emit_constant_ps_disable(brw); + } + + if (params->src.mt) { + const uint32_t sampler_offset = + gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); + gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); + } + + gen7_blorp_emit_ps_config(brw, params); + gen7_blorp_emit_cc_viewport(brw); + + if (params->depth.mt) + gen7_blorp_emit_depth_stencil_config(brw, params); + else + gen7_blorp_emit_depth_disable(brw); + gen7_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen7_blorp_emit_primitive(brw, params); +} diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp deleted file mode 100644 index e2e6072410c..00000000000 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ /dev/null @@ -1,885 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_state.h" - -#include "brw_blorp.h" - -static bool -gen7_blorp_skip_urb_config(const struct brw_context *brw) -{ - if (brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) - return false; - - /* Vertex buffer takes 24 bytes. As the size is expressed in 64 bytes, - * one will suffice, otherwise the setup can be any valid configuration. - */ - return brw->urb.vsize > 0; -} - -/* 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: - * - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ -void -gen7_blorp_emit_urb_config(struct brw_context *brw) -{ - /* URB allocations must be done in 8k chunks. */ - const unsigned chunk_size_bytes = 8192; - const unsigned urb_size = - (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16; - const unsigned push_constant_bytes = 1024 * urb_size; - const unsigned push_constant_chunks = - push_constant_bytes / chunk_size_bytes; - const unsigned vs_size = 1; - const unsigned vs_start = push_constant_chunks; - const unsigned vs_chunks = - DIV_ROUND_UP(brw->urb.min_vs_entries * vs_size * 64, chunk_size_bytes); - - if (gen7_blorp_skip_urb_config(brw)) - return; - - brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; - - gen7_emit_push_constant_state(brw, - urb_size / 2 /* vs_size */, - 0 /* hs_size */, - 0 /* ds_size */, - 0 /* gs_size */, - urb_size / 2 /* fs_size */); - - gen7_emit_urb_state(brw, - brw->urb.min_vs_entries /* num_vs_entries */, - vs_size, - vs_start, - 0 /* num_hs_entries */, - 1 /* hs_size */, - vs_start + vs_chunks /* hs_start */, - 0 /* num_ds_entries */, - 1 /* ds_size */, - vs_start + vs_chunks /* ds_start */, - 0 /* num_gs_entries */, - 1 /* gs_size */, - vs_start + vs_chunks /* gs_start */); -} - - -/* 3DSTATE_BLEND_STATE_POINTERS */ -void -gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, - uint32_t cc_blend_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_blend_state_offset | 1); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_CC_STATE_POINTERS */ -void -gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, - uint32_t cc_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_state_offset | 1); - ADVANCE_BATCH(); -} - -void -gen7_blorp_emit_cc_viewport(struct brw_context *brw) -{ - struct brw_cc_viewport *ccv; - uint32_t cc_vp_offset; - - ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, - sizeof(*ccv), 32, - &cc_vp_offset); - ccv->min_depth = 0.0; - ccv->max_depth = 1.0; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); - OUT_BATCH(cc_vp_offset); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS - * - * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -static void -gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context *brw, - uint32_t depthstencil_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(depthstencil_offset | 1); - ADVANCE_BATCH(); -} - - -/* SURFACE_STATE for renderbuffer or texture surface (see - * brw_update_renderbuffer_surface and brw_update_texture_surface) - */ -static uint32_t -gen7_blorp_emit_surface_state(struct brw_context *brw, - const struct brw_blorp_surface_info *surface, - uint32_t read_domains, uint32_t write_domain, - bool is_render_target) -{ - uint32_t wm_surf_offset; - uint32_t width = surface->width; - uint32_t height = surface->height; - /* Note: since gen7 uses INTEL_MSAA_LAYOUT_CMS or INTEL_MSAA_LAYOUT_UMS for - * color surfaces, width and height are measured in pixels; we don't need - * to divide them by 2 as we do for Gen6 (see - * gen6_blorp_emit_surface_state). - */ - struct intel_mipmap_tree *mt = surface->mt; - uint32_t tile_x, tile_y; - const uint8_t mocs = GEN7_MOCS_L3; - - uint32_t tiling = surface->map_stencil_as_y_tiled - ? I915_TILING_Y : mt->tiling; - - uint32_t *surf = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &wm_surf_offset); - memset(surf, 0, 8 * 4); - - surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | - surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | - gen7_surface_tiling_mode(tiling); - - if (surface->mt->valign == 4) - surf[0] |= GEN7_SURFACE_VALIGN_4; - if (surface->mt->halign == 8) - surf[0] |= GEN7_SURFACE_HALIGN_8; - - if (surface->array_layout == ALL_SLICES_AT_EACH_LOD) - surf[0] |= GEN7_SURFACE_ARYSPC_LOD0; - else - surf[0] |= GEN7_SURFACE_ARYSPC_FULL; - - /* reloc */ - surf[1] = brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + - mt->bo->offset64; - - /* Note that the low bits of these fields are missing, so - * there's the possibility of getting in trouble. - */ - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 2, BRW_SURFACE_Y_OFFSET) | - SET_FIELD(mocs, GEN7_SURFACE_MOCS); - - surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) | - SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT); - - uint32_t pitch_bytes = mt->pitch; - if (surface->map_stencil_as_y_tiled) - pitch_bytes *= 2; - surf[3] = pitch_bytes - 1; - - surf[4] = gen7_surface_msaa_bits(surface->num_samples, surface->msaa_layout); - if (surface->mt->mcs_mt) { - gen7_set_surface_mcs_info(brw, surf, wm_surf_offset, surface->mt->mcs_mt, - is_render_target); - } - - surf[7] = surface->mt->fast_clear_color_value; - - if (brw->is_haswell) { - surf[7] |= (SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A)); - } - - /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(brw->batch.bo, - wm_surf_offset + 4, - mt->bo, - surf[1] - mt->bo->offset64, - read_domains, write_domain); - - gen7_check_surface_setup(surf, is_render_target); - - return wm_surf_offset; -} - - -/* 3DSTATE_VS - * - * Disable vertex shader. - */ -static void -gen7_blorp_emit_vs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_HS - * - * Disable the hull shader. - */ -static void -gen7_blorp_emit_hs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_HS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_TE - * - * Disable the tesselation engine. - */ -void -gen7_blorp_emit_te_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_DS - * - * Disable the domain shader. - */ -static void -gen7_blorp_emit_ds_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_DS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(6); - OUT_BATCH(_3DSTATE_DS << 16 | (6 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_GS - * - * Disable the geometry shader. - */ -static void -gen7_blorp_emit_gs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - /** - * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages > - * Geometry > Geometry Shader > State: - * - * "Note: Because of corruption in IVB:GT2, software needs to flush the - * whole fixed function pipeline when the GS enable changes value in - * the 3DSTATE_GS." - * - * The hardware architects have clarified that in this context "flush the - * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS - * Stall" bit set. - */ - if (brw->gen < 8 && !brw->is_haswell && brw->gt == 2 && brw->gs.enabled) - gen7_emit_cs_stall_flush(brw); - - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - brw->gs.enabled = false; -} - -/* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ -static void -gen7_blorp_emit_streamout_disable(struct brw_context *brw) -{ - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -static void -gen7_blorp_emit_sf_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* 3DSTATE_SF - * - * Disable ViewportTransformEnable (dw1.1) - * - * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D - * Primitives Overview": - * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the - * use of screen- space coordinates). - * - * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5) - * and BackFaceFillMode (dw1.4:3) to SOLID(0). - * - * From the Sandy Bridge PRM, Volume 2, Part 1, Section - * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode: - * SOLID: Any triangle or rectangle object found to be front-facing - * is rendered as a solid object. This setting is required when - * (rendering rectangle (RECTLIST) objects. - */ - { - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_SF << 16 | (7 - 2)); - OUT_BATCH(params->depth_format << - GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT); - OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_SBE */ - { - BEGIN_BATCH(14); - OUT_BATCH(_3DSTATE_SBE << 16 | (14 - 2)); - OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | - params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | - 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << - GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT); - for (int i = 0; i < 12; ++i) - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - - -/** - * Disable thread dispatch (dw5.19) and enable the HiZ op. - */ -static void -gen7_blorp_emit_wm_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw1 = 0, dw2 = 0; - - switch (params->hiz_op) { - case GEN6_HIZ_OP_DEPTH_CLEAR: - dw1 |= GEN7_WM_DEPTH_CLEAR; - break; - case GEN6_HIZ_OP_DEPTH_RESOLVE: - dw1 |= GEN7_WM_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_HIZ_RESOLVE: - dw1 |= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE; - break; - case GEN6_HIZ_OP_NONE: - break; - default: - unreachable("not reached"); - } - dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; - dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; - dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ - - if (params->wm_prog_data) - dw1 |= GEN7_WM_DISPATCH_ENABLE; /* We are rendering */ - - if (params->src.mt) - dw1 |= GEN7_WM_KILL_ENABLE; /* TODO: temporarily smash on */ - - if (params->dst.num_samples > 1) { - dw1 |= GEN7_WM_MSRAST_ON_PATTERN; - if (prog_data && prog_data->persample_msaa_dispatch) - dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; - else - dw2 |= GEN7_WM_MSDISPMODE_PERPIXEL; - } else { - dw1 |= GEN7_WM_MSRAST_OFF_PIXEL; - dw2 |= GEN7_WM_MSDISPMODE_PERSAMPLE; - } - - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_WM << 16 | (3 - 2)); - OUT_BATCH(dw1); - OUT_BATCH(dw2); - ADVANCE_BATCH(); -} - - -/** - * 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the - * valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ -static void -gen7_blorp_emit_ps_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw2, dw4, dw5; - const int max_threads_shift = brw->is_haswell ? - HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT; - - dw2 = dw4 = dw5 = 0; - dw4 |= (brw->max_wm_threads - 1) << max_threads_shift; - - /* If there's a WM program, we need to do 16-pixel dispatch since that's - * what the program is compiled for. If there isn't, then it shouldn't - * matter because no program is actually being run. However, the hardware - * gets angry if we don't enable at least one dispatch mode, so just enable - * 16-pixel dispatch unconditionally. - */ - dw4 |= GEN7_PS_16_DISPATCH_ENABLE; - - if (brw->is_haswell) - dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */ - if (params->wm_prog_data) { - dw4 |= GEN7_PS_PUSH_CONSTANT_ENABLE; - dw5 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; - } - - if (params->src.mt) - dw2 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ - - dw4 |= params->fast_clear_op; - - BEGIN_BATCH(8); - OUT_BATCH(_3DSTATE_PS << 16 | (8 - 2)); - OUT_BATCH(params->wm_prog_kernel); - OUT_BATCH(dw2); - OUT_BATCH(0); - OUT_BATCH(dw4); - OUT_BATCH(dw5); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -void -gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, - uint32_t wm_bind_bo_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(wm_bind_bo_offset); - ADVANCE_BATCH(); -} - - -void -gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, - uint32_t sampler_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(sampler_offset); - ADVANCE_BATCH(); -} - - -void -gen7_blorp_emit_constant_ps(struct brw_context *brw, - uint32_t wm_push_const_offset) -{ - const uint8_t mocs = GEN7_MOCS_L3; - - /* Make sure the push constants fill an exact integer number of - * registers. - */ - assert(sizeof(struct brw_blorp_wm_push_constants) % 32 == 0); - - /* There must be at least one register worth of push constant data. */ - assert(BRW_BLORP_NUM_PUSH_CONST_REGS > 0); - - /* Enable push constant buffer 0. */ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | - (7 - 2)); - OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); - OUT_BATCH(0); - OUT_BATCH(wm_push_const_offset | mocs); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -void -gen7_blorp_emit_constant_ps_disable(struct brw_context *brw) -{ - BEGIN_BATCH(7); - OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (7 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const uint8_t mocs = GEN7_MOCS_L3; - uint32_t surfwidth, surfheight; - uint32_t surftype; - unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1); - unsigned int min_array_element; - GLenum gl_target = params->depth.mt->target; - unsigned int lod; - - switch (gl_target) { - case GL_TEXTURE_CUBE_MAP_ARRAY: - case GL_TEXTURE_CUBE_MAP: - /* The PRM claims that we should use BRW_SURFACE_CUBE for this - * situation, but experiments show that gl_Layer doesn't work when we do - * this. So we use BRW_SURFACE_2D, since for rendering purposes this is - * equivalent. - */ - surftype = BRW_SURFACE_2D; - depth *= 6; - break; - default: - surftype = translate_tex_target(gl_target); - break; - } - - min_array_element = params->depth.layer; - if (params->depth.mt->num_samples > 1) { - /* Convert physical layer to logical layer. */ - min_array_element /= params->depth.mt->num_samples; - } - - lod = params->depth.level - params->depth.mt->first_level; - - if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) { - /* HIZ ops for lod 0 may set the width & height a little - * larger to allow the fast depth clear to fit the hardware - * alignment requirements. (8x4) - */ - surfwidth = params->depth.width; - surfheight = params->depth.height; - } else { - surfwidth = params->depth.mt->logical_width0; - surfheight = params->depth.mt->logical_height0; - } - - /* 3DSTATE_DEPTH_BUFFER */ - { - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(7); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH((params->depth.mt->pitch - 1) | - params->depth_format << 18 | - 1 << 22 | /* hiz enable */ - 1 << 28 | /* depth write */ - surftype << 29); - OUT_RELOC(params->depth.mt->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - OUT_BATCH((surfwidth - 1) << 4 | - (surfheight - 1) << 18 | - lod); - OUT_BATCH(((depth - 1) << 21) | - (min_array_element << 10) | - mocs); - OUT_BATCH(0); - OUT_BATCH((depth - 1) << 21); - ADVANCE_BATCH(); - } - - /* 3DSTATE_HIER_DEPTH_BUFFER */ - { - struct intel_miptree_aux_buffer *hiz_buf = params->depth.mt->hiz_buf; - - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); - OUT_BATCH((mocs << 25) | - (hiz_buf->pitch - 1)); - OUT_RELOC(hiz_buf->bo, - I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, - 0); - ADVANCE_BATCH(); - } - - /* 3DSTATE_STENCIL_BUFFER */ - { - BEGIN_BATCH(3); - OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - - -static void -gen7_blorp_emit_depth_disable(struct brw_context *brw) -{ - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(7); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2)); - OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT << 18 | (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_CLEAR_PARAMS - * - * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 - * 3DSTATE_CLEAR_PARAMS: - * 3DSTATE_CLEAR_PARAMS must always be programmed in the along - * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, - * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). - */ -void -gen7_blorp_emit_clear_params(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); - OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0); - OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); - ADVANCE_BATCH(); -} - - -/* 3DPRIMITIVE */ -void -gen7_blorp_emit_primitive(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | - _3DPRIM_RECTLIST); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(params->num_layers); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - -/** - * \copydoc gen6_blorp_exec() - */ -void -gen7_blorp_exec(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - if (brw->gen >= 8) - return; - - uint32_t cc_blend_state_offset = 0; - uint32_t cc_state_offset = 0; - uint32_t depthstencil_offset; - uint32_t wm_push_const_offset = 0; - uint32_t wm_bind_bo_offset = 0; - - if (brw_state_base_address.dirty.brw & brw->ctx.NewDriverState) - brw_state_base_address.emit(brw); - - gen6_emit_3dstate_multisample(brw, params->dst.num_samples); - gen6_emit_3dstate_sample_mask(brw, - params->dst.num_samples > 1 ? - (1 << params->dst.num_samples) - 1 : 1); - gen6_blorp_emit_vertices(brw, params); - gen7_blorp_emit_urb_config(brw); - if (params->wm_prog_data) { - cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params); - cc_state_offset = gen6_blorp_emit_cc_state(brw); - gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); - gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); - } - depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params); - gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset); - if (brw->use_resource_streamer) - gen7_disable_hw_binding_tables(brw); - if (params->wm_prog_data) { - uint32_t wm_surf_offset_renderbuffer; - uint32_t wm_surf_offset_texture = 0; - wm_push_const_offset = gen6_blorp_emit_wm_constants(brw, params); - intel_miptree_used_for_rendering(params->dst.mt); - wm_surf_offset_renderbuffer = - gen7_blorp_emit_surface_state(brw, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - true /* is_render_target */); - if (params->src.mt) { - wm_surf_offset_texture = - gen7_blorp_emit_surface_state(brw, ¶ms->src, - I915_GEM_DOMAIN_SAMPLER, 0, - false /* is_render_target */); - } - wm_bind_bo_offset = - gen6_blorp_emit_binding_table(brw, - wm_surf_offset_renderbuffer, - wm_surf_offset_texture); - } - gen7_blorp_emit_vs_disable(brw); - gen7_blorp_emit_hs_disable(brw); - gen7_blorp_emit_te_disable(brw); - gen7_blorp_emit_ds_disable(brw); - gen7_blorp_emit_gs_disable(brw); - gen7_blorp_emit_streamout_disable(brw); - gen6_blorp_emit_clip_disable(brw); - gen7_blorp_emit_sf_config(brw, params); - gen7_blorp_emit_wm_config(brw, params); - if (params->wm_prog_data) { - gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); - gen7_blorp_emit_constant_ps(brw, wm_push_const_offset); - } else { - gen7_blorp_emit_constant_ps_disable(brw); - } - - if (params->src.mt) { - const uint32_t sampler_offset = - gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); - } - - gen7_blorp_emit_ps_config(brw, params); - gen7_blorp_emit_cc_viewport(brw); - - if (params->depth.mt) - gen7_blorp_emit_depth_stencil_config(brw, params); - else - gen7_blorp_emit_depth_disable(brw); - gen7_blorp_emit_clear_params(brw, params); - gen6_blorp_emit_drawing_rectangle(brw, params); - gen7_blorp_emit_primitive(brw, params); -} diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c b/src/mesa/drivers/dri/i965/gen8_blorp.c new file mode 100644 index 00000000000..720f5244ca2 --- /dev/null +++ b/src/mesa/drivers/dri/i965/gen8_blorp.c @@ -0,0 +1,715 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_fbo.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "brw_blorp.h" + + +/* SURFACE_STATE for renderbuffer or texture surface (see + * brw_update_renderbuffer_surface and brw_update_texture_surface) + */ +static uint32_t +gen8_blorp_emit_surface_state(struct brw_context *brw, + const struct brw_blorp_surface_info *surface, + uint32_t read_domains, uint32_t write_domain, + bool is_render_target) +{ + uint32_t wm_surf_offset; + const struct intel_mipmap_tree *mt = surface->mt; + const uint32_t mocs_wb = is_render_target ? + (brw->gen >= 9 ? SKL_MOCS_PTE : BDW_MOCS_PTE) : + (brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB); + const uint32_t tiling = surface->map_stencil_as_y_tiled + ? I915_TILING_Y : mt->tiling; + uint32_t tile_x, tile_y; + + uint32_t *surf = gen8_allocate_surface_state(brw, &wm_surf_offset, -1); + + surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | + gen8_vertical_alignment(brw, mt, BRW_SURFACE_2D) | + gen8_horizontal_alignment(brw, mt, BRW_SURFACE_2D) | + gen8_surface_tiling_mode(tiling); + + surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; + + surf[2] = SET_FIELD(surface->width - 1, GEN7_SURFACE_WIDTH) | + SET_FIELD(surface->height - 1, GEN7_SURFACE_HEIGHT); + + uint32_t pitch_bytes = mt->pitch; + if (surface->map_stencil_as_y_tiled) + pitch_bytes *= 2; + surf[3] = pitch_bytes - 1; + + surf[4] = gen7_surface_msaa_bits(surface->num_samples, + surface->msaa_layout); + + if (surface->mt->mcs_mt) { + surf[6] = SET_FIELD(surface->mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | + SET_FIELD((surface->mt->mcs_mt->pitch / 128) - 1, + GEN8_SURFACE_AUX_PITCH) | + GEN8_SURFACE_AUX_MODE_MCS; + } else { + surf[6] = 0; + } + + gen8_emit_fast_clear_color(brw, mt, surf); + surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | + SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | + SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | + SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); + + /* reloc */ + *((uint64_t *)&surf[8]) = + brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + + mt->bo->offset64; + + /* Note that the low bits of these fields are missing, so there's the + * possibility of getting in trouble. + */ + assert(tile_x % 4 == 0); + assert(tile_y % 4 == 0); + surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | + SET_FIELD(tile_y / 4, GEN8_SURFACE_Y_OFFSET); + + if (brw->gen >= 9) { + /* Disable Mip Tail by setting a large value. */ + surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); + } + + if (surface->mt->mcs_mt) { + *((uint64_t *) &surf[10]) = surface->mt->mcs_mt->bo->offset64; + drm_intel_bo_emit_reloc(brw->batch.bo, + wm_surf_offset + 10 * 4, + surface->mt->mcs_mt->bo, 0, + read_domains, write_domain); + } + + /* Emit relocation to surface contents */ + drm_intel_bo_emit_reloc(brw->batch.bo, + wm_surf_offset + 8 * 4, + mt->bo, + surf[8] - mt->bo->offset64, + read_domains, write_domain); + + return wm_surf_offset; +} + +static uint32_t +gen8_blorp_emit_blend_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t blend_state_offset; + + assume(params->num_draw_buffers); + + const unsigned size = 4 + 8 * params->num_draw_buffers; + uint32_t *blend = (uint32_t *)brw_state_batch(brw, AUB_TRACE_BLEND_STATE, + size, 64, + &blend_state_offset); + memset(blend, 0, size); + + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { + if (params->color_write_disable[0]) + blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_RED; + if (params->color_write_disable[1]) + blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_GREEN; + if (params->color_write_disable[2]) + blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_BLUE; + if (params->color_write_disable[3]) + blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_ALPHA; + + blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE | + GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE | + GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT; + } + + return blend_state_offset; +} + +static void +gen8_blorp_emit_disable_constant_state(struct brw_context *brw, + unsigned opcode) +{ + BEGIN_BATCH(11); + OUT_BATCH(opcode << 16 | (11 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_disable_binding_table(struct brw_context *brw, + unsigned opcode) +{ + + BEGIN_BATCH(2); + OUT_BATCH(opcode << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_VS + * + * Disable vertex shader. + */ +static void +gen8_blorp_emit_vs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_HS + * + * Disable the hull shader. + */ +static void +gen8_blorp_emit_hs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(9); + OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_DS + * + * Disable the domain shader. + */ +static void +gen8_blorp_emit_ds_disable(struct brw_context *brw) +{ + const int ds_pkt_len = brw->gen >= 9 ? 11 : 9; + BEGIN_BATCH(ds_pkt_len); + OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); + for (int i = 0; i < ds_pkt_len - 1; i++) + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_GS + * + * Disable the geometry shader. + */ +static void +gen8_blorp_emit_gs_disable(struct brw_context *brw) +{ + BEGIN_BATCH(10); + OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +/* 3DSTATE_STREAMOUT + * + * Disable streamout. + */ +static void +gen8_blorp_emit_streamout_disable(struct brw_context *brw) +{ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_raster_state(struct brw_context *brw) +{ + BEGIN_BATCH(5); + OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2)); + OUT_BATCH(GEN8_RASTER_CULL_NONE); + OUT_BATCH_F(0); + OUT_BATCH_F(0); + OUT_BATCH_F(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_sbe_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + /* 3DSTATE_SBE */ + { + const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6; + BEGIN_BATCH(sbe_cmd_length); + OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2)); + OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | + params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | + 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | + BRW_SF_URB_ENTRY_READ_OFFSET << + GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | + GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | + GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET); + OUT_BATCH(0); + OUT_BATCH(0); + if (sbe_cmd_length >= 6) { + OUT_BATCH(GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1)); + OUT_BATCH(0); + } + ADVANCE_BATCH(); + } + + { + BEGIN_BATCH(11); + OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2)); + + /* Output DWords 1 through 8: */ + for (int i = 0; i < 8; i++) { + OUT_BATCH(0); + } + + OUT_BATCH(0); /* wrapshortest enables 0-7 */ + OUT_BATCH(0); /* wrapshortest enables 8-15 */ + ADVANCE_BATCH(); + } +} + +static void +gen8_blorp_emit_sf_config(struct brw_context *brw) +{ + /* See gen6_blorp_emit_sf_config() */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE); + ADVANCE_BATCH(); +} + +/** + * Disable thread dispatch (dw5.19) and enable the HiZ op. + */ +static void +gen8_blorp_emit_wm_state(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); + OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0 | + GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 | + GEN7_WM_POINT_RASTRULE_UPPER_RIGHT); + ADVANCE_BATCH(); +} + +/** + * 3DSTATE_PS + * + * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite + * that, thread dispatch info must still be specified. + * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the + * valid range for this field is [0x3, 0x2f]. + * - A dispatch mode must be given; that is, at least one of the + * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was + * discovered through simulator error messages. + */ +static void +gen8_blorp_emit_ps_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + uint32_t dw3, dw5, dw6, dw7; + + dw3 = dw5 = dw6 = dw7 = 0; + dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; + + dw6 |= GEN7_PS_16_DISPATCH_ENABLE; + + if (params->src.mt) { + dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ + dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */ + } else { + dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */ + } + + dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE; + dw7 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; + + /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; + * it implicitly scales for different GT levels (which have some # of PSDs). + * + * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. + */ + if (brw->gen >= 9) + dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT; + else + dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT; + + dw6 |= GEN7_PS_POSOFFSET_NONE; + dw6 |= params->fast_clear_op; + + BEGIN_BATCH(12); + OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); + OUT_BATCH(params->wm_prog_kernel); + OUT_BATCH(0); + OUT_BATCH(dw3); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(dw6); + OUT_BATCH(dw7); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_ps_blend(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2)); + OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_ps_extra(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; + uint32_t dw1 = 0; + + dw1 |= GEN8_PSX_PIXEL_SHADER_VALID; + + if (params->src.mt) { + dw1 |= GEN8_PSX_KILL_ENABLE; + dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE; + } + + if (params->dst.num_samples > 1 && prog_data && + prog_data->persample_msaa_dispatch) + dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); + OUT_BATCH(dw1); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_depth_disable(struct brw_context *brw) +{ + /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ + if (brw->no_depth_or_stencil) + return; + + brw_emit_depth_stall_flushes(brw); + + BEGIN_BATCH(8); + OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2)); + OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(5); + OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + + BEGIN_BATCH(5); + OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_topology(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2)); + OUT_BATCH(_3DPRIM_RECTLIST); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_vf_instancing_state(struct brw_context *brw, + unsigned num_elems) +{ + for (unsigned i = 0; i < num_elems; ++i) { + BEGIN_BATCH(3); + OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); + OUT_BATCH(i); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +static void +gen8_blorp_emit_vf_state(struct brw_context *brw) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_depth_stencil_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const unsigned pkt_len = brw->gen >= 9 ? 4 : 3; + + BEGIN_BATCH(pkt_len); + OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + if (pkt_len > 3) { + OUT_BATCH(0); + } + ADVANCE_BATCH(); +} + +static void +gen8_blorp_emit_constant_ps(struct brw_context *brw, + uint32_t wm_push_const_offset) +{ + const int dwords = brw->gen >= 8 ? 11 : 7; + BEGIN_BATCH(dwords); + OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (dwords - 2)); + + if (brw->gen >= 9) { + OUT_BATCH(0); + OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); + } else { + OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); + OUT_BATCH(0); + } + + if (brw->gen >= 9) { + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, + wm_push_const_offset); + OUT_BATCH(0); + OUT_BATCH(0); + } else { + OUT_BATCH(wm_push_const_offset); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + } + + ADVANCE_BATCH(); +} + +static uint32_t +gen8_blorp_emit_surface_states(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t wm_surf_offset_renderbuffer; + uint32_t wm_surf_offset_texture = 0; + + intel_miptree_used_for_rendering(params->dst.mt); + + wm_surf_offset_renderbuffer = + gen8_blorp_emit_surface_state(brw, ¶ms->dst, + I915_GEM_DOMAIN_RENDER, + I915_GEM_DOMAIN_RENDER, + true /* is_render_target */); + if (params->src.mt) { + const struct brw_blorp_surface_info *surface = ¶ms->src; + struct intel_mipmap_tree *mt = surface->mt; + + /* Textures are always sampled as 2D. */ + const bool is_cube = mt->target == GL_TEXTURE_CUBE_MAP_ARRAY || + mt->target == GL_TEXTURE_CUBE_MAP; + const unsigned depth = (is_cube ? 6 : 1) * mt->logical_depth0; + const GLenum target = is_cube ? GL_TEXTURE_2D_ARRAY : mt->target; + const unsigned max_level = surface->level + mt->last_level + 1; + const unsigned layer = mt->target != GL_TEXTURE_3D ? + surface->layer / MAX2(mt->num_samples, 1) : 0; + + brw->vtbl.emit_texture_surface_state(brw, mt, target, + layer, layer + depth, + surface->level, max_level, + surface->brw_surfaceformat, + surface->swizzle, + &wm_surf_offset_texture, + -1, false, false); + } + + return gen6_blorp_emit_binding_table(brw, + wm_surf_offset_renderbuffer, + wm_surf_offset_texture); +} + +/** + * \copydoc gen6_blorp_exec() + */ +void +gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) +{ + uint32_t wm_bind_bo_offset = 0; + + if (gen8_state_base_address.dirty.brw & brw->ctx.NewDriverState) + gen8_upload_state_base_address(brw); + + gen7_blorp_emit_cc_viewport(brw); + gen7_l3_state.emit(brw); + + gen7_blorp_emit_urb_config(brw); + + const uint32_t cc_blend_state_offset = + gen8_blorp_emit_blend_state(brw, params); + gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); + + const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw); + gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); + + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_VS); + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_HS); + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_DS); + gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_GS); + + const uint32_t wm_push_const_offset = + gen6_blorp_emit_wm_constants(brw, params); + gen8_blorp_emit_constant_ps(brw, wm_push_const_offset); + wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params); + + gen8_blorp_emit_disable_binding_table(brw, + _3DSTATE_BINDING_TABLE_POINTERS_VS); + gen8_blorp_emit_disable_binding_table(brw, + _3DSTATE_BINDING_TABLE_POINTERS_HS); + gen8_blorp_emit_disable_binding_table(brw, + _3DSTATE_BINDING_TABLE_POINTERS_DS); + gen8_blorp_emit_disable_binding_table(brw, + _3DSTATE_BINDING_TABLE_POINTERS_GS); + + gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); + + if (params->src.mt) { + const uint32_t sampler_offset = + gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); + gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); + } + + gen8_emit_3dstate_multisample(brw, params->dst.num_samples); + gen6_emit_3dstate_sample_mask(brw, + params->dst.num_samples > 1 ? + (1 << params->dst.num_samples) - 1 : 1); + + gen8_disable_stages.emit(brw); + gen8_blorp_emit_vs_disable(brw); + gen8_blorp_emit_hs_disable(brw); + gen7_blorp_emit_te_disable(brw); + gen8_blorp_emit_ds_disable(brw); + gen8_blorp_emit_gs_disable(brw); + + gen8_blorp_emit_streamout_disable(brw); + gen6_blorp_emit_clip_disable(brw); + gen8_blorp_emit_raster_state(brw); + gen8_blorp_emit_sbe_state(brw, params); + gen8_blorp_emit_sf_config(brw); + + gen8_blorp_emit_ps_blend(brw); + gen8_blorp_emit_ps_extra(brw, params); + + gen8_blorp_emit_ps_config(brw, params); + + gen8_blorp_emit_depth_stencil_state(brw, params); + gen8_blorp_emit_wm_state(brw); + + gen8_blorp_emit_depth_disable(brw); + gen7_blorp_emit_clear_params(brw, params); + gen6_blorp_emit_drawing_rectangle(brw, params); + gen8_blorp_emit_vf_topology(brw); + gen8_blorp_emit_vf_sys_gen_vals_state(brw); + gen6_blorp_emit_vertices(brw, params); + gen8_blorp_emit_vf_instancing_state(brw, 2); + gen8_blorp_emit_vf_state(brw); + gen7_blorp_emit_primitive(brw, params); + + if (brw->gen < 9) + gen8_write_pma_stall_bits(brw, 0); +} diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.cpp b/src/mesa/drivers/dri/i965/gen8_blorp.cpp deleted file mode 100644 index 720f5244ca2..00000000000 --- a/src/mesa/drivers/dri/i965/gen8_blorp.cpp +++ /dev/null @@ -1,715 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "intel_batchbuffer.h" -#include "intel_fbo.h" -#include "intel_mipmap_tree.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" - -#include "brw_blorp.h" - - -/* SURFACE_STATE for renderbuffer or texture surface (see - * brw_update_renderbuffer_surface and brw_update_texture_surface) - */ -static uint32_t -gen8_blorp_emit_surface_state(struct brw_context *brw, - const struct brw_blorp_surface_info *surface, - uint32_t read_domains, uint32_t write_domain, - bool is_render_target) -{ - uint32_t wm_surf_offset; - const struct intel_mipmap_tree *mt = surface->mt; - const uint32_t mocs_wb = is_render_target ? - (brw->gen >= 9 ? SKL_MOCS_PTE : BDW_MOCS_PTE) : - (brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB); - const uint32_t tiling = surface->map_stencil_as_y_tiled - ? I915_TILING_Y : mt->tiling; - uint32_t tile_x, tile_y; - - uint32_t *surf = gen8_allocate_surface_state(brw, &wm_surf_offset, -1); - - surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | - surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | - gen8_vertical_alignment(brw, mt, BRW_SURFACE_2D) | - gen8_horizontal_alignment(brw, mt, BRW_SURFACE_2D) | - gen8_surface_tiling_mode(tiling); - - surf[1] = SET_FIELD(mocs_wb, GEN8_SURFACE_MOCS) | mt->qpitch >> 2; - - surf[2] = SET_FIELD(surface->width - 1, GEN7_SURFACE_WIDTH) | - SET_FIELD(surface->height - 1, GEN7_SURFACE_HEIGHT); - - uint32_t pitch_bytes = mt->pitch; - if (surface->map_stencil_as_y_tiled) - pitch_bytes *= 2; - surf[3] = pitch_bytes - 1; - - surf[4] = gen7_surface_msaa_bits(surface->num_samples, - surface->msaa_layout); - - if (surface->mt->mcs_mt) { - surf[6] = SET_FIELD(surface->mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) | - SET_FIELD((surface->mt->mcs_mt->pitch / 128) - 1, - GEN8_SURFACE_AUX_PITCH) | - GEN8_SURFACE_AUX_MODE_MCS; - } else { - surf[6] = 0; - } - - gen8_emit_fast_clear_color(brw, mt, surf); - surf[7] |= SET_FIELD(HSW_SCS_RED, GEN7_SURFACE_SCS_R) | - SET_FIELD(HSW_SCS_GREEN, GEN7_SURFACE_SCS_G) | - SET_FIELD(HSW_SCS_BLUE, GEN7_SURFACE_SCS_B) | - SET_FIELD(HSW_SCS_ALPHA, GEN7_SURFACE_SCS_A); - - /* reloc */ - *((uint64_t *)&surf[8]) = - brw_blorp_compute_tile_offsets(surface, &tile_x, &tile_y) + - mt->bo->offset64; - - /* Note that the low bits of these fields are missing, so there's the - * possibility of getting in trouble. - */ - assert(tile_x % 4 == 0); - assert(tile_y % 4 == 0); - surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 4, GEN8_SURFACE_Y_OFFSET); - - if (brw->gen >= 9) { - /* Disable Mip Tail by setting a large value. */ - surf[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD); - } - - if (surface->mt->mcs_mt) { - *((uint64_t *) &surf[10]) = surface->mt->mcs_mt->bo->offset64; - drm_intel_bo_emit_reloc(brw->batch.bo, - wm_surf_offset + 10 * 4, - surface->mt->mcs_mt->bo, 0, - read_domains, write_domain); - } - - /* Emit relocation to surface contents */ - drm_intel_bo_emit_reloc(brw->batch.bo, - wm_surf_offset + 8 * 4, - mt->bo, - surf[8] - mt->bo->offset64, - read_domains, write_domain); - - return wm_surf_offset; -} - -static uint32_t -gen8_blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t blend_state_offset; - - assume(params->num_draw_buffers); - - const unsigned size = 4 + 8 * params->num_draw_buffers; - uint32_t *blend = (uint32_t *)brw_state_batch(brw, AUB_TRACE_BLEND_STATE, - size, 64, - &blend_state_offset); - memset(blend, 0, size); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - if (params->color_write_disable[0]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_RED; - if (params->color_write_disable[1]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_GREEN; - if (params->color_write_disable[2]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_BLUE; - if (params->color_write_disable[3]) - blend[1 + 2 * i] |= GEN8_BLEND_WRITE_DISABLE_ALPHA; - - blend[1 + 2 * i + 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE | - GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE | - GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT; - } - - return blend_state_offset; -} - -static void -gen8_blorp_emit_disable_constant_state(struct brw_context *brw, - unsigned opcode) -{ - BEGIN_BATCH(11); - OUT_BATCH(opcode << 16 | (11 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_disable_binding_table(struct brw_context *brw, - unsigned opcode) -{ - - BEGIN_BATCH(2); - OUT_BATCH(opcode << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_VS - * - * Disable vertex shader. - */ -static void -gen8_blorp_emit_vs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_VS << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_HS - * - * Disable the hull shader. - */ -static void -gen8_blorp_emit_hs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(9); - OUT_BATCH(_3DSTATE_HS << 16 | (9 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_DS - * - * Disable the domain shader. - */ -static void -gen8_blorp_emit_ds_disable(struct brw_context *brw) -{ - const int ds_pkt_len = brw->gen >= 9 ? 11 : 9; - BEGIN_BATCH(ds_pkt_len); - OUT_BATCH(_3DSTATE_DS << 16 | (ds_pkt_len - 2)); - for (int i = 0; i < ds_pkt_len - 1; i++) - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_GS - * - * Disable the geometry shader. - */ -static void -gen8_blorp_emit_gs_disable(struct brw_context *brw) -{ - BEGIN_BATCH(10); - OUT_BATCH(_3DSTATE_GS << 16 | (10 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -/* 3DSTATE_STREAMOUT - * - * Disable streamout. - */ -static void -gen8_blorp_emit_streamout_disable(struct brw_context *brw) -{ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_STREAMOUT << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_raster_state(struct brw_context *brw) -{ - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_RASTER << 16 | (5 - 2)); - OUT_BATCH(GEN8_RASTER_CULL_NONE); - OUT_BATCH_F(0); - OUT_BATCH_F(0); - OUT_BATCH_F(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_sbe_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - /* 3DSTATE_SBE */ - { - const unsigned sbe_cmd_length = brw->gen == 8 ? 4 : 6; - BEGIN_BATCH(sbe_cmd_length); - OUT_BATCH(_3DSTATE_SBE << 16 | (sbe_cmd_length - 2)); - OUT_BATCH(GEN7_SBE_SWIZZLE_ENABLE | - params->num_varyings << GEN7_SBE_NUM_OUTPUTS_SHIFT | - 1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT | - BRW_SF_URB_ENTRY_READ_OFFSET << - GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT | - GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH | - GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET); - OUT_BATCH(0); - OUT_BATCH(0); - if (sbe_cmd_length >= 6) { - OUT_BATCH(GEN9_SBE_ACTIVE_COMPONENT_XYZW << (0 << 1)); - OUT_BATCH(0); - } - ADVANCE_BATCH(); - } - - { - BEGIN_BATCH(11); - OUT_BATCH(_3DSTATE_SBE_SWIZ << 16 | (11 - 2)); - - /* Output DWords 1 through 8: */ - for (int i = 0; i < 8; i++) { - OUT_BATCH(0); - } - - OUT_BATCH(0); /* wrapshortest enables 0-7 */ - OUT_BATCH(0); /* wrapshortest enables 8-15 */ - ADVANCE_BATCH(); - } -} - -static void -gen8_blorp_emit_sf_config(struct brw_context *brw) -{ - /* See gen6_blorp_emit_sf_config() */ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_SF << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE); - ADVANCE_BATCH(); -} - -/** - * Disable thread dispatch (dw5.19) and enable the HiZ op. - */ -static void -gen8_blorp_emit_wm_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_WM << 16 | (2 - 2)); - OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0 | - GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5 | - GEN7_WM_POINT_RASTRULE_UPPER_RIGHT); - ADVANCE_BATCH(); -} - -/** - * 3DSTATE_PS - * - * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite - * that, thread dispatch info must still be specified. - * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the - * valid range for this field is [0x3, 0x2f]. - * - A dispatch mode must be given; that is, at least one of the - * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was - * discovered through simulator error messages. - */ -static void -gen8_blorp_emit_ps_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw3, dw5, dw6, dw7; - - dw3 = dw5 = dw6 = dw7 = 0; - dw3 |= GEN7_PS_VECTOR_MASK_ENABLE; - - dw6 |= GEN7_PS_16_DISPATCH_ENABLE; - - if (params->src.mt) { - dw3 |= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */ - dw3 |= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* Two surfaces */ - } else { - dw3 |= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT; /* One surface */ - } - - dw6 |= GEN7_PS_PUSH_CONSTANT_ENABLE; - dw7 |= prog_data->first_curbe_grf << GEN7_PS_DISPATCH_START_GRF_SHIFT_0; - - /* 3DSTATE_PS expects the number of threads per PSD, which is always 64; - * it implicitly scales for different GT levels (which have some # of PSDs). - * - * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1. - */ - if (brw->gen >= 9) - dw6 |= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT; - else - dw6 |= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT; - - dw6 |= GEN7_PS_POSOFFSET_NONE; - dw6 |= params->fast_clear_op; - - BEGIN_BATCH(12); - OUT_BATCH(_3DSTATE_PS << 16 | (12 - 2)); - OUT_BATCH(params->wm_prog_kernel); - OUT_BATCH(0); - OUT_BATCH(dw3); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(dw6); - OUT_BATCH(dw7); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_ps_blend(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_PS_BLEND << 16 | (2 - 2)); - OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_ps_extra(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const struct brw_blorp_prog_data *prog_data = params->wm_prog_data; - uint32_t dw1 = 0; - - dw1 |= GEN8_PSX_PIXEL_SHADER_VALID; - - if (params->src.mt) { - dw1 |= GEN8_PSX_KILL_ENABLE; - dw1 |= GEN8_PSX_ATTRIBUTE_ENABLE; - } - - if (params->dst.num_samples > 1 && prog_data && - prog_data->persample_msaa_dispatch) - dw1 |= GEN8_PSX_SHADER_IS_PER_SAMPLE; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_PS_EXTRA << 16 | (2 - 2)); - OUT_BATCH(dw1); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_depth_disable(struct brw_context *brw) -{ - /* Skip repeated NULL depth/stencil emits (think 2D rendering). */ - if (brw->no_depth_or_stencil) - return; - - brw_emit_depth_stall_flushes(brw); - - BEGIN_BATCH(8); - OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (8 - 2)); - OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) | (BRW_SURFACE_NULL << 29)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(5); - OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - - BEGIN_BATCH(5); - OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (5 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_topology(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2)); - OUT_BATCH(_3DPRIM_RECTLIST); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_vf_instancing_state(struct brw_context *brw, - unsigned num_elems) -{ - for (unsigned i = 0; i < num_elems; ++i) { - BEGIN_BATCH(3); - OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2)); - OUT_BATCH(i); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -static void -gen8_blorp_emit_vf_state(struct brw_context *brw) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VF << 16 | (2 - 2)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned pkt_len = brw->gen >= 9 ? 4 : 3; - - BEGIN_BATCH(pkt_len); - OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL << 16 | (pkt_len - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - if (pkt_len > 3) { - OUT_BATCH(0); - } - ADVANCE_BATCH(); -} - -static void -gen8_blorp_emit_constant_ps(struct brw_context *brw, - uint32_t wm_push_const_offset) -{ - const int dwords = brw->gen >= 8 ? 11 : 7; - BEGIN_BATCH(dwords); - OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (dwords - 2)); - - if (brw->gen >= 9) { - OUT_BATCH(0); - OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); - } else { - OUT_BATCH(BRW_BLORP_NUM_PUSH_CONST_REGS); - OUT_BATCH(0); - } - - if (brw->gen >= 9) { - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_RELOC64(brw->batch.bo, I915_GEM_DOMAIN_RENDER, 0, - wm_push_const_offset); - OUT_BATCH(0); - OUT_BATCH(0); - } else { - OUT_BATCH(wm_push_const_offset); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - } - - ADVANCE_BATCH(); -} - -static uint32_t -gen8_blorp_emit_surface_states(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t wm_surf_offset_renderbuffer; - uint32_t wm_surf_offset_texture = 0; - - intel_miptree_used_for_rendering(params->dst.mt); - - wm_surf_offset_renderbuffer = - gen8_blorp_emit_surface_state(brw, ¶ms->dst, - I915_GEM_DOMAIN_RENDER, - I915_GEM_DOMAIN_RENDER, - true /* is_render_target */); - if (params->src.mt) { - const struct brw_blorp_surface_info *surface = ¶ms->src; - struct intel_mipmap_tree *mt = surface->mt; - - /* Textures are always sampled as 2D. */ - const bool is_cube = mt->target == GL_TEXTURE_CUBE_MAP_ARRAY || - mt->target == GL_TEXTURE_CUBE_MAP; - const unsigned depth = (is_cube ? 6 : 1) * mt->logical_depth0; - const GLenum target = is_cube ? GL_TEXTURE_2D_ARRAY : mt->target; - const unsigned max_level = surface->level + mt->last_level + 1; - const unsigned layer = mt->target != GL_TEXTURE_3D ? - surface->layer / MAX2(mt->num_samples, 1) : 0; - - brw->vtbl.emit_texture_surface_state(brw, mt, target, - layer, layer + depth, - surface->level, max_level, - surface->brw_surfaceformat, - surface->swizzle, - &wm_surf_offset_texture, - -1, false, false); - } - - return gen6_blorp_emit_binding_table(brw, - wm_surf_offset_renderbuffer, - wm_surf_offset_texture); -} - -/** - * \copydoc gen6_blorp_exec() - */ -void -gen8_blorp_exec(struct brw_context *brw, const struct brw_blorp_params *params) -{ - uint32_t wm_bind_bo_offset = 0; - - if (gen8_state_base_address.dirty.brw & brw->ctx.NewDriverState) - gen8_upload_state_base_address(brw); - - gen7_blorp_emit_cc_viewport(brw); - gen7_l3_state.emit(brw); - - gen7_blorp_emit_urb_config(brw); - - const uint32_t cc_blend_state_offset = - gen8_blorp_emit_blend_state(brw, params); - gen7_blorp_emit_blend_state_pointer(brw, cc_blend_state_offset); - - const uint32_t cc_state_offset = gen6_blorp_emit_cc_state(brw); - gen7_blorp_emit_cc_state_pointer(brw, cc_state_offset); - - gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_VS); - gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_HS); - gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_DS); - gen8_blorp_emit_disable_constant_state(brw, _3DSTATE_CONSTANT_GS); - - const uint32_t wm_push_const_offset = - gen6_blorp_emit_wm_constants(brw, params); - gen8_blorp_emit_constant_ps(brw, wm_push_const_offset); - wm_bind_bo_offset = gen8_blorp_emit_surface_states(brw, params); - - gen8_blorp_emit_disable_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_VS); - gen8_blorp_emit_disable_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_HS); - gen8_blorp_emit_disable_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_DS); - gen8_blorp_emit_disable_binding_table(brw, - _3DSTATE_BINDING_TABLE_POINTERS_GS); - - gen7_blorp_emit_binding_table_pointers_ps(brw, wm_bind_bo_offset); - - if (params->src.mt) { - const uint32_t sampler_offset = - gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true); - gen7_blorp_emit_sampler_state_pointers_ps(brw, sampler_offset); - } - - gen8_emit_3dstate_multisample(brw, params->dst.num_samples); - gen6_emit_3dstate_sample_mask(brw, - params->dst.num_samples > 1 ? - (1 << params->dst.num_samples) - 1 : 1); - - gen8_disable_stages.emit(brw); - gen8_blorp_emit_vs_disable(brw); - gen8_blorp_emit_hs_disable(brw); - gen7_blorp_emit_te_disable(brw); - gen8_blorp_emit_ds_disable(brw); - gen8_blorp_emit_gs_disable(brw); - - gen8_blorp_emit_streamout_disable(brw); - gen6_blorp_emit_clip_disable(brw); - gen8_blorp_emit_raster_state(brw); - gen8_blorp_emit_sbe_state(brw, params); - gen8_blorp_emit_sf_config(brw); - - gen8_blorp_emit_ps_blend(brw); - gen8_blorp_emit_ps_extra(brw, params); - - gen8_blorp_emit_ps_config(brw, params); - - gen8_blorp_emit_depth_stencil_state(brw, params); - gen8_blorp_emit_wm_state(brw); - - gen8_blorp_emit_depth_disable(brw); - gen7_blorp_emit_clear_params(brw, params); - gen6_blorp_emit_drawing_rectangle(brw, params); - gen8_blorp_emit_vf_topology(brw); - gen8_blorp_emit_vf_sys_gen_vals_state(brw); - gen6_blorp_emit_vertices(brw, params); - gen8_blorp_emit_vf_instancing_state(brw, 2); - gen8_blorp_emit_vf_state(brw); - gen7_blorp_emit_primitive(brw, params); - - if (brw->gen < 9) - gen8_write_pma_stall_bits(brw, 0); -}