From: Jason Ekstrand Date: Sat, 6 Aug 2016 00:16:37 +0000 (-0700) Subject: i965/blorp: Move the non-static blorp state setup helpers to another file X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e4d6ffbbf68682523237faf15915a64e237317cb;p=mesa.git i965/blorp: Move the non-static blorp state setup helpers to another file We're about to start replacing blorp state setup code with packing structs and we want to feel free to delete files as we go. Signed-off-by: Jason Ekstrand Reviewed-by: Topi Pohjolainen --- diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index eea9479c8e7..735a312e9a5 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -103,6 +103,7 @@ i965_FILES = \ brw_binding_tables.c \ brw_blorp.c \ brw_blorp.h \ + brw_blorp_emit.c \ brw_cc.c \ brw_clear.c \ brw_clip.c \ diff --git a/src/mesa/drivers/dri/i965/brw_blorp_emit.c b/src/mesa/drivers/dri/i965/brw_blorp_emit.c new file mode 100644 index 00000000000..c0a7e55195c --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_blorp_emit.c @@ -0,0 +1,577 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "intel_batchbuffer.h" +#include "intel_mipmap_tree.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" + +#include "blorp_priv.h" +#include "vbo/vbo.h" +#include "brw_draw.h" + +static void +gen6_blorp_emit_input_varying_data(struct brw_context *brw, + const struct brw_blorp_params *params, + unsigned *offset, + unsigned *size) +{ + const unsigned vec4_size_in_bytes = 4 * sizeof(float); + const unsigned max_num_varyings = + DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); + const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; + + *size = num_varyings * vec4_size_in_bytes; + + const float *const inputs_src = (const float *)¶ms->wm_inputs; + float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, + *size, 32, offset); + + /* Walk over the attribute slots, determine if the attribute is used by + * the program and when necessary copy the values from the input storage to + * the vertex data buffer. + */ + for (unsigned i = 0; i < max_num_varyings; i++) { + const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; + + if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) + continue; + + memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); + + inputs += 4; + } +} + +static void +gen6_blorp_emit_vertex_data(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t vertex_offset; + uint32_t const_data_offset = 0; + unsigned const_data_size = 0; + + /* Setup VBO for the rectangle primitive.. + * + * A rectangle primitive (3DPRIM_RECTLIST) consists of only three + * vertices. The vertices reside in screen space with DirectX coordinates + * (that is, (0, 0) is the upper left corner). + * + * v2 ------ implied + * | | + * | | + * v0 ----- v1 + * + * Since the VS is disabled, the clipper loads each VUE directly from + * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and + * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: + * dw0: Reserved, MBZ. + * dw1: Render Target Array Index. The HiZ op does not use indexed + * vertices, so set the dword to 0. + * dw2: Viewport Index. The HiZ op disables viewport mapping and + * scissoring, so set the dword to 0. + * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so + * set the dword to 0. + * dw4: Vertex Position X. + * dw5: Vertex Position Y. + * dw6: Vertex Position Z. + * dw7: Vertex Position W. + * + * dw8: Flat vertex input 0 + * dw9: Flat vertex input 1 + * ... + * dwn: Flat vertex input n - 8 + * + * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 + * "Vertex URB Entry (VUE) Formats". + * + * Only vertex position X and Y are going to be variable, Z is fixed to + * zero and W to one. Header words dw0-3 are all zero. There is no need to + * include the fixed values in the vertex buffer. Vertex fetcher can be + * instructed to fill vertex elements with constant values of one and zero + * instead of reading them from the buffer. + * Flat inputs are program constants that are not interpolated. Moreover + * their values will be the same between vertices. + * + * See the vertex element setup below. + */ + const float vertices[] = { + /* v0 */ (float)params->x0, (float)params->y1, + /* v1 */ (float)params->x1, (float)params->y1, + /* v2 */ (float)params->x0, (float)params->y0, + }; + + float *const vertex_data = (float *)brw_state_batch( + brw, AUB_TRACE_VERTEX_BUFFER, + sizeof(vertices), 32, + &vertex_offset); + memcpy(vertex_data, vertices, sizeof(vertices)); + + if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) + gen6_blorp_emit_input_varying_data(brw, params, + &const_data_offset, + &const_data_size); + + /* 3DSTATE_VERTEX_BUFFERS */ + const int num_buffers = 1 + (const_data_size > 0); + const int batch_length = 1 + 4 * num_buffers; + + BEGIN_BATCH(batch_length); + OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); + + const unsigned blorp_num_vue_elems = 2; + const unsigned stride = blorp_num_vue_elems * sizeof(float); + EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo, + vertex_offset, vertex_offset + sizeof(vertices), + stride, 0 /* steprate */); + + if (const_data_size) { + /* Tell vertex fetcher not to advance the pointer in the buffer when + * moving to the next vertex. This will effectively provide the same + * data for all the vertices. For flat inputs only the data provided + * for the first provoking vertex actually matters. + */ + const unsigned stride_zero = 0; + EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo, + const_data_offset, + const_data_offset + const_data_size, + stride_zero, 0 /* step_rate */); + } + + ADVANCE_BATCH(); +} + +void +gen6_blorp_emit_vertices(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + gen6_blorp_emit_vertex_data(brw, params); + + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned num_elements = 2 + num_varyings; + const int batch_length = 1 + 2 * num_elements; + + BEGIN_BATCH(batch_length); + + /* 3DSTATE_VERTEX_ELEMENTS + * + * Fetch dwords 0 - 7 from each VUE. See the comments above where + * the vertex_bo is filled with data. First element contains dwords + * for the VUE header, second the actual position values and the + * remaining contain the flat inputs. + */ + { + OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); + /* Element 0 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); + /* Element 1 */ + OUT_BATCH(GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | + 0 << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); + } + + for (unsigned i = 0; i < num_varyings; ++i) { + /* Element 2 + i */ + OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT | + GEN6_VE0_VALID | + BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | + (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT); + OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | + BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); + } + + ADVANCE_BATCH(); +} + + +/* BLEND_STATE */ +uint32_t +gen6_blorp_emit_blend_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t cc_blend_state_offset; + + assume(params->num_draw_buffers); + + const unsigned size = params->num_draw_buffers * + sizeof(struct gen6_blend_state); + struct gen6_blend_state *blend = (struct gen6_blend_state *) + brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, + &cc_blend_state_offset); + + memset(blend, 0, size); + + for (unsigned i = 0; i < params->num_draw_buffers; ++i) { + blend[i].blend1.pre_blend_clamp_enable = 1; + blend[i].blend1.post_blend_clamp_enable = 1; + blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; + + blend[i].blend1.write_disable_r = params->color_write_disable[0]; + blend[i].blend1.write_disable_g = params->color_write_disable[1]; + blend[i].blend1.write_disable_b = params->color_write_disable[2]; + blend[i].blend1.write_disable_a = params->color_write_disable[3]; + } + + return cc_blend_state_offset; +} + + +/* CC_STATE */ +uint32_t +gen6_blorp_emit_cc_state(struct brw_context *brw) +{ + uint32_t cc_state_offset; + + struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) + brw_state_batch(brw, AUB_TRACE_CC_STATE, + sizeof(gen6_color_calc_state), 64, + &cc_state_offset); + memset(cc, 0, sizeof(*cc)); + + return cc_state_offset; +} + + +/** + * \param out_offset is relative to + * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. + */ +uint32_t +gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + uint32_t depthstencil_offset; + + struct gen6_depth_stencil_state *state; + state = (struct gen6_depth_stencil_state *) + brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, + sizeof(*state), 64, + &depthstencil_offset); + memset(state, 0, sizeof(*state)); + + /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: + * - 7.5.3.1 Depth Buffer Clear + * - 7.5.3.2 Depth Buffer Resolve + * - 7.5.3.3 Hierarchical Depth Buffer Resolve + */ + state->ds2.depth_write_enable = 1; + if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { + state->ds2.depth_test_enable = 1; + state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; + } + + return depthstencil_offset; +} + + +/* BINDING_TABLE. See brw_wm_binding_table(). */ +uint32_t +gen6_blorp_emit_binding_table(struct brw_context *brw, + uint32_t wm_surf_offset_renderbuffer, + uint32_t wm_surf_offset_texture) +{ + uint32_t wm_bind_bo_offset; + uint32_t *bind = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, + sizeof(uint32_t) * + BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, + 32, /* alignment */ + &wm_bind_bo_offset); + bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = + wm_surf_offset_renderbuffer; + bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; + + return wm_bind_bo_offset; +} + + +/** + * SAMPLER_STATE. See brw_update_sampler_state(). + */ +uint32_t +gen6_blorp_emit_sampler_state(struct brw_context *brw, + unsigned tex_filter, unsigned max_lod, + bool non_normalized_coords) +{ + uint32_t sampler_offset; + uint32_t *sampler_state = (uint32_t *) + brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); + + unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | + BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | + BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; + + /* XXX: I don't think that using firstLevel, lastLevel works, + * because we always setup the surface state as if firstLevel == + * level zero. Probably have to subtract firstLevel from each of + * these: + */ + brw_emit_sampler_state(brw, + sampler_state, + sampler_offset, + tex_filter, /* min filter */ + tex_filter, /* mag filter */ + BRW_MIPFILTER_NONE, + BRW_ANISORATIO_2, + address_rounding, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + BRW_TEXCOORDMODE_CLAMP, + 0, /* min LOD */ + max_lod, + 0, /* LOD bias */ + 0, /* shadow function */ + non_normalized_coords, + 0); /* border color offset - unused */ + + return sampler_offset; +} + + +/* 3DSTATE_CLIP + * + * Disable the clipper. + * + * The BLORP op emits a rectangle primitive, which requires clipping to + * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 + * Section 1.3 "3D Primitives Overview": + * RECTLIST: + * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip + * Mode should be set to a value other than CLIPMODE_NORMAL. + * + * Also disable perspective divide. This doesn't change the clipper's + * output, but does spare a few electrons. + */ +void +gen6_blorp_emit_clip_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_DRAWING_RECTANGLE */ +void +gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | + ((MAX2(params->y1, params->y0) - 1) << 16)); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +/* Once vertex fetcher has written full VUE entries with complete + * header the space requirement is as follows per vertex (in bytes): + * + * Header Position Program constants + * +--------+------------+-------------------+ + * | 16 | 16 | n x 16 | + * +--------+------------+-------------------+ + * + * where 'n' stands for number of varying inputs expressed as vec4s. + * + * The URB size is in turn expressed in 64 bytes (512 bits). + */ +static unsigned +gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) +{ + const unsigned num_varyings = + params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; + const unsigned total_needed = 16 + 16 + num_varyings * 16; + + return DIV_ROUND_UP(total_needed, 64); +} + +/* 3DSTATE_URB_VS + * 3DSTATE_URB_HS + * 3DSTATE_URB_DS + * 3DSTATE_URB_GS + * + * If the 3DSTATE_URB_VS is emitted, than the others must be also. + * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: + * + * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be + * programmed in order for the programming of this state to be + * valid. + */ +void +gen7_blorp_emit_urb_config(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); + + if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && + brw->urb.vsize >= vs_entry_size) + return; + + brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; + + gen7_upload_urb(brw, vs_entry_size, false, false); +} + + +/* 3DSTATE_BLEND_STATE_POINTERS */ +void +gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, + uint32_t cc_blend_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_blend_state_offset | 1); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CC_STATE_POINTERS */ +void +gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, + uint32_t cc_state_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); + OUT_BATCH(cc_state_offset | 1); + ADVANCE_BATCH(); +} + +void +gen7_blorp_emit_cc_viewport(struct brw_context *brw) +{ + struct brw_cc_viewport *ccv; + uint32_t cc_vp_offset; + + ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, + sizeof(*ccv), 32, + &cc_vp_offset); + ccv->min_depth = 0.0; + ccv->max_depth = 1.0; + + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); + OUT_BATCH(cc_vp_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_TE + * + * Disable the tesselation engine. + */ +void +gen7_blorp_emit_te_disable(struct brw_context *brw) +{ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, + uint32_t wm_bind_bo_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(wm_bind_bo_offset); + ADVANCE_BATCH(); +} + + +void +gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, + uint32_t sampler_offset) +{ + BEGIN_BATCH(2); + OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); + OUT_BATCH(sampler_offset); + ADVANCE_BATCH(); +} + + +/* 3DSTATE_CLEAR_PARAMS + * + * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 + * 3DSTATE_CLEAR_PARAMS: + * 3DSTATE_CLEAR_PARAMS must always be programmed in the along + * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, + * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). + */ +void +gen7_blorp_emit_clear_params(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(3); + OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); + OUT_BATCH(params->depth.clear_color.u32[0]); + OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); + ADVANCE_BATCH(); +} + + +/* 3DPRIMITIVE */ +void +gen7_blorp_emit_primitive(struct brw_context *brw, + const struct brw_blorp_params *params) +{ + BEGIN_BATCH(7); + OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); + OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | + _3DPRIM_RECTLIST); + OUT_BATCH(3); /* vertex count per instance */ + OUT_BATCH(0); + OUT_BATCH(params->num_layers); /* instance count */ + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); +} diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.c b/src/mesa/drivers/dri/i965/gen6_blorp.c index 93ff8d1897b..d27c550be5f 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.c +++ b/src/mesa/drivers/dri/i965/gen6_blorp.c @@ -34,193 +34,6 @@ #include "vbo/vbo.h" #include "brw_draw.h" -static void -gen6_blorp_emit_input_varying_data(struct brw_context *brw, - const struct brw_blorp_params *params, - unsigned *offset, - unsigned *size) -{ - const unsigned vec4_size_in_bytes = 4 * sizeof(float); - const unsigned max_num_varyings = - DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes); - const unsigned num_varyings = params->wm_prog_data->num_varying_inputs; - - *size = num_varyings * vec4_size_in_bytes; - - const float *const inputs_src = (const float *)¶ms->wm_inputs; - float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER, - *size, 32, offset); - - /* Walk over the attribute slots, determine if the attribute is used by - * the program and when necessary copy the values from the input storage to - * the vertex data buffer. - */ - for (unsigned i = 0; i < max_num_varyings; i++) { - const gl_varying_slot attr = VARYING_SLOT_VAR0 + i; - - if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr))) - continue; - - memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes); - - inputs += 4; - } -} - -static void -gen6_blorp_emit_vertex_data(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t vertex_offset; - uint32_t const_data_offset = 0; - unsigned const_data_size = 0; - - /* Setup VBO for the rectangle primitive.. - * - * A rectangle primitive (3DPRIM_RECTLIST) consists of only three - * vertices. The vertices reside in screen space with DirectX coordinates - * (that is, (0, 0) is the upper left corner). - * - * v2 ------ implied - * | | - * | | - * v0 ----- v1 - * - * Since the VS is disabled, the clipper loads each VUE directly from - * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and - * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows: - * dw0: Reserved, MBZ. - * dw1: Render Target Array Index. The HiZ op does not use indexed - * vertices, so set the dword to 0. - * dw2: Viewport Index. The HiZ op disables viewport mapping and - * scissoring, so set the dword to 0. - * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so - * set the dword to 0. - * dw4: Vertex Position X. - * dw5: Vertex Position Y. - * dw6: Vertex Position Z. - * dw7: Vertex Position W. - * - * dw8: Flat vertex input 0 - * dw9: Flat vertex input 1 - * ... - * dwn: Flat vertex input n - 8 - * - * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1 - * "Vertex URB Entry (VUE) Formats". - * - * Only vertex position X and Y are going to be variable, Z is fixed to - * zero and W to one. Header words dw0-3 are all zero. There is no need to - * include the fixed values in the vertex buffer. Vertex fetcher can be - * instructed to fill vertex elements with constant values of one and zero - * instead of reading them from the buffer. - * Flat inputs are program constants that are not interpolated. Moreover - * their values will be the same between vertices. - * - * See the vertex element setup below. - */ - const float vertices[] = { - /* v0 */ (float)params->x0, (float)params->y1, - /* v1 */ (float)params->x1, (float)params->y1, - /* v2 */ (float)params->x0, (float)params->y0, - }; - - float *const vertex_data = (float *)brw_state_batch( - brw, AUB_TRACE_VERTEX_BUFFER, - sizeof(vertices), 32, - &vertex_offset); - memcpy(vertex_data, vertices, sizeof(vertices)); - - if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) - gen6_blorp_emit_input_varying_data(brw, params, - &const_data_offset, - &const_data_size); - - /* 3DSTATE_VERTEX_BUFFERS */ - const int num_buffers = 1 + (const_data_size > 0); - const int batch_length = 1 + 4 * num_buffers; - - BEGIN_BATCH(batch_length); - OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2)); - - const unsigned blorp_num_vue_elems = 2; - const unsigned stride = blorp_num_vue_elems * sizeof(float); - EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo, - vertex_offset, vertex_offset + sizeof(vertices), - stride, 0 /* steprate */); - - if (const_data_size) { - /* Tell vertex fetcher not to advance the pointer in the buffer when - * moving to the next vertex. This will effectively provide the same - * data for all the vertices. For flat inputs only the data provided - * for the first provoking vertex actually matters. - */ - const unsigned stride_zero = 0; - EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo, - const_data_offset, - const_data_offset + const_data_size, - stride_zero, 0 /* step_rate */); - } - - ADVANCE_BATCH(); -} - -void -gen6_blorp_emit_vertices(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - gen6_blorp_emit_vertex_data(brw, params); - - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned num_elements = 2 + num_varyings; - const int batch_length = 1 + 2 * num_elements; - - BEGIN_BATCH(batch_length); - - /* 3DSTATE_VERTEX_ELEMENTS - * - * Fetch dwords 0 - 7 from each VUE. See the comments above where - * the vertex_bo is filled with data. First element contains dwords - * for the VUE header, second the actual position values and the - * remaining contain the flat inputs. - */ - { - OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2)); - /* Element 0 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT); - /* Element 1 */ - OUT_BATCH(GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT | - 0 << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT); - } - - for (unsigned i = 0; i < num_varyings; ++i) { - /* Element 2 + i */ - OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT | - GEN6_VE0_VALID | - BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT | - (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT); - OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT | - BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT); - } - - ADVANCE_BATCH(); -} - - /* 3DSTATE_URB * * Assign the entire URB to the VS. Even though the VS disabled, URB space @@ -249,86 +62,6 @@ gen6_blorp_emit_urb_config(struct brw_context *brw, } -/* BLEND_STATE */ -uint32_t -gen6_blorp_emit_blend_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t cc_blend_state_offset; - - assume(params->num_draw_buffers); - - const unsigned size = params->num_draw_buffers * - sizeof(struct gen6_blend_state); - struct gen6_blend_state *blend = (struct gen6_blend_state *) - brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64, - &cc_blend_state_offset); - - memset(blend, 0, size); - - for (unsigned i = 0; i < params->num_draw_buffers; ++i) { - blend[i].blend1.pre_blend_clamp_enable = 1; - blend[i].blend1.post_blend_clamp_enable = 1; - blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT; - - blend[i].blend1.write_disable_r = params->color_write_disable[0]; - blend[i].blend1.write_disable_g = params->color_write_disable[1]; - blend[i].blend1.write_disable_b = params->color_write_disable[2]; - blend[i].blend1.write_disable_a = params->color_write_disable[3]; - } - - return cc_blend_state_offset; -} - - -/* CC_STATE */ -uint32_t -gen6_blorp_emit_cc_state(struct brw_context *brw) -{ - uint32_t cc_state_offset; - - struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *) - brw_state_batch(brw, AUB_TRACE_CC_STATE, - sizeof(gen6_color_calc_state), 64, - &cc_state_offset); - memset(cc, 0, sizeof(*cc)); - - return cc_state_offset; -} - - -/** - * \param out_offset is relative to - * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress. - */ -uint32_t -gen6_blorp_emit_depth_stencil_state(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - uint32_t depthstencil_offset; - - struct gen6_depth_stencil_state *state; - state = (struct gen6_depth_stencil_state *) - brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE, - sizeof(*state), 64, - &depthstencil_offset); - memset(state, 0, sizeof(*state)); - - /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2: - * - 7.5.3.1 Depth Buffer Clear - * - 7.5.3.2 Depth Buffer Resolve - * - 7.5.3.3 Hierarchical Depth Buffer Resolve - */ - state->ds2.depth_write_enable = 1; - if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) { - state->ds2.depth_test_enable = 1; - state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER; - } - - return depthstencil_offset; -} - - /* 3DSTATE_CC_STATE_POINTERS * * The pointer offsets are relative to @@ -352,73 +85,6 @@ gen6_blorp_emit_cc_state_pointers(struct brw_context *brw, } -/* BINDING_TABLE. See brw_wm_binding_table(). */ -uint32_t -gen6_blorp_emit_binding_table(struct brw_context *brw, - uint32_t wm_surf_offset_renderbuffer, - uint32_t wm_surf_offset_texture) -{ - uint32_t wm_bind_bo_offset; - uint32_t *bind = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * - BRW_BLORP_NUM_BINDING_TABLE_ENTRIES, - 32, /* alignment */ - &wm_bind_bo_offset); - bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] = - wm_surf_offset_renderbuffer; - bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture; - - return wm_bind_bo_offset; -} - - -/** - * SAMPLER_STATE. See brw_update_sampler_state(). - */ -uint32_t -gen6_blorp_emit_sampler_state(struct brw_context *brw, - unsigned tex_filter, unsigned max_lod, - bool non_normalized_coords) -{ - uint32_t sampler_offset; - uint32_t *sampler_state = (uint32_t *) - brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset); - - unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_V_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_R_MIN | - BRW_ADDRESS_ROUNDING_ENABLE_U_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_V_MAG | - BRW_ADDRESS_ROUNDING_ENABLE_R_MAG; - - /* XXX: I don't think that using firstLevel, lastLevel works, - * because we always setup the surface state as if firstLevel == - * level zero. Probably have to subtract firstLevel from each of - * these: - */ - brw_emit_sampler_state(brw, - sampler_state, - sampler_offset, - tex_filter, /* min filter */ - tex_filter, /* mag filter */ - BRW_MIPFILTER_NONE, - BRW_ANISORATIO_2, - address_rounding, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - BRW_TEXCOORDMODE_CLAMP, - 0, /* min LOD */ - max_lod, - 0, /* LOD bias */ - 0, /* shadow function */ - non_normalized_coords, - 0); /* border color offset - unused */ - - return sampler_offset; -} - - /** * 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers(). */ @@ -508,32 +174,6 @@ gen6_blorp_emit_gs_disable(struct brw_context *brw, } -/* 3DSTATE_CLIP - * - * Disable the clipper. - * - * The BLORP op emits a rectangle primitive, which requires clipping to - * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1 - * Section 1.3 "3D Primitives Overview": - * RECTLIST: - * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip - * Mode should be set to a value other than CLIPMODE_NORMAL. - * - * Also disable perspective divide. This doesn't change the clipper's - * output, but does spare a few electrons. - */ -void -gen6_blorp_emit_clip_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /* 3DSTATE_SF * * Disable ViewportTransformEnable (dw2.1) @@ -829,21 +469,6 @@ gen6_blorp_emit_clear_params(struct brw_context *brw, ADVANCE_BATCH(); } - -/* 3DSTATE_DRAWING_RECTANGLE */ -void -gen6_blorp_emit_drawing_rectangle(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) | - ((MAX2(params->y1, params->y0) - 1) << 16)); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - /* 3DSTATE_VIEWPORT_STATE_POINTERS */ static void gen6_blorp_emit_viewport_state(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.c b/src/mesa/drivers/dri/i965/gen7_blorp.c index 8fe293dc051..ce7047b7ccd 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.c +++ b/src/mesa/drivers/dri/i965/gen7_blorp.c @@ -32,97 +32,6 @@ #include "blorp_priv.h" -/* Once vertex fetcher has written full VUE entries with complete - * header the space requirement is as follows per vertex (in bytes): - * - * Header Position Program constants - * +--------+------------+-------------------+ - * | 16 | 16 | n x 16 | - * +--------+------------+-------------------+ - * - * where 'n' stands for number of varying inputs expressed as vec4s. - * - * The URB size is in turn expressed in 64 bytes (512 bits). - */ -static unsigned -gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params) -{ - const unsigned num_varyings = - params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0; - const unsigned total_needed = 16 + 16 + num_varyings * 16; - - return DIV_ROUND_UP(total_needed, 64); -} - -/* 3DSTATE_URB_VS - * 3DSTATE_URB_HS - * 3DSTATE_URB_DS - * 3DSTATE_URB_GS - * - * If the 3DSTATE_URB_VS is emitted, than the others must be also. - * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS: - * - * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be - * programmed in order for the programming of this state to be - * valid. - */ -void -gen7_blorp_emit_urb_config(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params); - - if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) && - brw->urb.vsize >= vs_entry_size) - return; - - brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE; - - gen7_upload_urb(brw, vs_entry_size, false, false); -} - - -/* 3DSTATE_BLEND_STATE_POINTERS */ -void -gen7_blorp_emit_blend_state_pointer(struct brw_context *brw, - uint32_t cc_blend_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_blend_state_offset | 1); - ADVANCE_BATCH(); -} - - -/* 3DSTATE_CC_STATE_POINTERS */ -void -gen7_blorp_emit_cc_state_pointer(struct brw_context *brw, - uint32_t cc_state_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2)); - OUT_BATCH(cc_state_offset | 1); - ADVANCE_BATCH(); -} - -void -gen7_blorp_emit_cc_viewport(struct brw_context *brw) -{ - struct brw_cc_viewport *ccv; - uint32_t cc_vp_offset; - - ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE, - sizeof(*ccv), 32, - &cc_vp_offset); - ccv->min_depth = 0.0; - ccv->max_depth = 1.0; - - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2)); - OUT_BATCH(cc_vp_offset); - ADVANCE_BATCH(); -} - /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS * @@ -195,22 +104,6 @@ gen7_blorp_emit_hs_disable(struct brw_context *brw) } -/* 3DSTATE_TE - * - * Disable the tesselation engine. - */ -void -gen7_blorp_emit_te_disable(struct brw_context *brw) -{ - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /* 3DSTATE_DS * * Disable the domain shader. @@ -460,27 +353,6 @@ gen7_blorp_emit_ps_config(struct brw_context *brw, } -void -gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw, - uint32_t wm_bind_bo_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(wm_bind_bo_offset); - ADVANCE_BATCH(); -} - - -void -gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw, - uint32_t sampler_offset) -{ - BEGIN_BATCH(2); - OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2)); - OUT_BATCH(sampler_offset); - ADVANCE_BATCH(); -} - static void gen7_blorp_emit_depth_stencil_config(struct brw_context *brw, const struct brw_blorp_params *params) @@ -580,44 +452,6 @@ gen7_blorp_emit_depth_disable(struct brw_context *brw) } -/* 3DSTATE_CLEAR_PARAMS - * - * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4 - * 3DSTATE_CLEAR_PARAMS: - * 3DSTATE_CLEAR_PARAMS must always be programmed in the along - * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER, - * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER). - */ -void -gen7_blorp_emit_clear_params(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(3); - OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2)); - OUT_BATCH(params->depth.clear_color.u32[0]); - OUT_BATCH(GEN7_DEPTH_CLEAR_VALID); - ADVANCE_BATCH(); -} - - -/* 3DPRIMITIVE */ -void -gen7_blorp_emit_primitive(struct brw_context *brw, - const struct brw_blorp_params *params) -{ - BEGIN_BATCH(7); - OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2)); - OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL | - _3DPRIM_RECTLIST); - OUT_BATCH(3); /* vertex count per instance */ - OUT_BATCH(0); - OUT_BATCH(params->num_layers); /* instance count */ - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); -} - - /** * \copydoc gen6_blorp_exec() */