--- /dev/null
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_defines.h"
+#include "brw_state.h"
+
+#include "blorp_priv.h"
+#include "vbo/vbo.h"
+#include "brw_draw.h"
+
+static void
+gen6_blorp_emit_input_varying_data(struct brw_context *brw,
+ const struct brw_blorp_params *params,
+ unsigned *offset,
+ unsigned *size)
+{
+ const unsigned vec4_size_in_bytes = 4 * sizeof(float);
+ const unsigned max_num_varyings =
+ DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
+ const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
+
+ *size = num_varyings * vec4_size_in_bytes;
+
+ const float *const inputs_src = (const float *)¶ms->wm_inputs;
+ float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
+ *size, 32, offset);
+
+ /* Walk over the attribute slots, determine if the attribute is used by
+ * the program and when necessary copy the values from the input storage to
+ * the vertex data buffer.
+ */
+ for (unsigned i = 0; i < max_num_varyings; i++) {
+ const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
+
+ if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
+ continue;
+
+ memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
+
+ inputs += 4;
+ }
+}
+
+static void
+gen6_blorp_emit_vertex_data(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ uint32_t vertex_offset;
+ uint32_t const_data_offset = 0;
+ unsigned const_data_size = 0;
+
+ /* Setup VBO for the rectangle primitive..
+ *
+ * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
+ * vertices. The vertices reside in screen space with DirectX coordinates
+ * (that is, (0, 0) is the upper left corner).
+ *
+ * v2 ------ implied
+ * | |
+ * | |
+ * v0 ----- v1
+ *
+ * Since the VS is disabled, the clipper loads each VUE directly from
+ * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
+ * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
+ * dw0: Reserved, MBZ.
+ * dw1: Render Target Array Index. The HiZ op does not use indexed
+ * vertices, so set the dword to 0.
+ * dw2: Viewport Index. The HiZ op disables viewport mapping and
+ * scissoring, so set the dword to 0.
+ * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
+ * set the dword to 0.
+ * dw4: Vertex Position X.
+ * dw5: Vertex Position Y.
+ * dw6: Vertex Position Z.
+ * dw7: Vertex Position W.
+ *
+ * dw8: Flat vertex input 0
+ * dw9: Flat vertex input 1
+ * ...
+ * dwn: Flat vertex input n - 8
+ *
+ * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
+ * "Vertex URB Entry (VUE) Formats".
+ *
+ * Only vertex position X and Y are going to be variable, Z is fixed to
+ * zero and W to one. Header words dw0-3 are all zero. There is no need to
+ * include the fixed values in the vertex buffer. Vertex fetcher can be
+ * instructed to fill vertex elements with constant values of one and zero
+ * instead of reading them from the buffer.
+ * Flat inputs are program constants that are not interpolated. Moreover
+ * their values will be the same between vertices.
+ *
+ * See the vertex element setup below.
+ */
+ const float vertices[] = {
+ /* v0 */ (float)params->x0, (float)params->y1,
+ /* v1 */ (float)params->x1, (float)params->y1,
+ /* v2 */ (float)params->x0, (float)params->y0,
+ };
+
+ float *const vertex_data = (float *)brw_state_batch(
+ brw, AUB_TRACE_VERTEX_BUFFER,
+ sizeof(vertices), 32,
+ &vertex_offset);
+ memcpy(vertex_data, vertices, sizeof(vertices));
+
+ if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs)
+ gen6_blorp_emit_input_varying_data(brw, params,
+ &const_data_offset,
+ &const_data_size);
+
+ /* 3DSTATE_VERTEX_BUFFERS */
+ const int num_buffers = 1 + (const_data_size > 0);
+ const int batch_length = 1 + 4 * num_buffers;
+
+ BEGIN_BATCH(batch_length);
+ OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
+
+ const unsigned blorp_num_vue_elems = 2;
+ const unsigned stride = blorp_num_vue_elems * sizeof(float);
+ EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo,
+ vertex_offset, vertex_offset + sizeof(vertices),
+ stride, 0 /* steprate */);
+
+ if (const_data_size) {
+ /* Tell vertex fetcher not to advance the pointer in the buffer when
+ * moving to the next vertex. This will effectively provide the same
+ * data for all the vertices. For flat inputs only the data provided
+ * for the first provoking vertex actually matters.
+ */
+ const unsigned stride_zero = 0;
+ EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo,
+ const_data_offset,
+ const_data_offset + const_data_size,
+ stride_zero, 0 /* step_rate */);
+ }
+
+ ADVANCE_BATCH();
+}
+
+void
+gen6_blorp_emit_vertices(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ gen6_blorp_emit_vertex_data(brw, params);
+
+ const unsigned num_varyings =
+ params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
+ const unsigned num_elements = 2 + num_varyings;
+ const int batch_length = 1 + 2 * num_elements;
+
+ BEGIN_BATCH(batch_length);
+
+ /* 3DSTATE_VERTEX_ELEMENTS
+ *
+ * Fetch dwords 0 - 7 from each VUE. See the comments above where
+ * the vertex_bo is filled with data. First element contains dwords
+ * for the VUE header, second the actual position values and the
+ * remaining contain the flat inputs.
+ */
+ {
+ OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
+ /* Element 0 */
+ OUT_BATCH(GEN6_VE0_VALID |
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+ 0 << BRW_VE0_SRC_OFFSET_SHIFT);
+ OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
+ /* Element 1 */
+ OUT_BATCH(GEN6_VE0_VALID |
+ BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+ 0 << BRW_VE0_SRC_OFFSET_SHIFT);
+ OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
+ }
+
+ for (unsigned i = 0; i < num_varyings; ++i) {
+ /* Element 2 + i */
+ OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT |
+ GEN6_VE0_VALID |
+ BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
+ (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT);
+ OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
+ BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
+ }
+
+ ADVANCE_BATCH();
+}
+
+
+/* BLEND_STATE */
+uint32_t
+gen6_blorp_emit_blend_state(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ uint32_t cc_blend_state_offset;
+
+ assume(params->num_draw_buffers);
+
+ const unsigned size = params->num_draw_buffers *
+ sizeof(struct gen6_blend_state);
+ struct gen6_blend_state *blend = (struct gen6_blend_state *)
+ brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64,
+ &cc_blend_state_offset);
+
+ memset(blend, 0, size);
+
+ for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
+ blend[i].blend1.pre_blend_clamp_enable = 1;
+ blend[i].blend1.post_blend_clamp_enable = 1;
+ blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
+
+ blend[i].blend1.write_disable_r = params->color_write_disable[0];
+ blend[i].blend1.write_disable_g = params->color_write_disable[1];
+ blend[i].blend1.write_disable_b = params->color_write_disable[2];
+ blend[i].blend1.write_disable_a = params->color_write_disable[3];
+ }
+
+ return cc_blend_state_offset;
+}
+
+
+/* CC_STATE */
+uint32_t
+gen6_blorp_emit_cc_state(struct brw_context *brw)
+{
+ uint32_t cc_state_offset;
+
+ struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *)
+ brw_state_batch(brw, AUB_TRACE_CC_STATE,
+ sizeof(gen6_color_calc_state), 64,
+ &cc_state_offset);
+ memset(cc, 0, sizeof(*cc));
+
+ return cc_state_offset;
+}
+
+
+/**
+ * \param out_offset is relative to
+ * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
+ */
+uint32_t
+gen6_blorp_emit_depth_stencil_state(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ uint32_t depthstencil_offset;
+
+ struct gen6_depth_stencil_state *state;
+ state = (struct gen6_depth_stencil_state *)
+ brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
+ sizeof(*state), 64,
+ &depthstencil_offset);
+ memset(state, 0, sizeof(*state));
+
+ /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
+ * - 7.5.3.1 Depth Buffer Clear
+ * - 7.5.3.2 Depth Buffer Resolve
+ * - 7.5.3.3 Hierarchical Depth Buffer Resolve
+ */
+ state->ds2.depth_write_enable = 1;
+ if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
+ state->ds2.depth_test_enable = 1;
+ state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER;
+ }
+
+ return depthstencil_offset;
+}
+
+
+/* BINDING_TABLE. See brw_wm_binding_table(). */
+uint32_t
+gen6_blorp_emit_binding_table(struct brw_context *brw,
+ uint32_t wm_surf_offset_renderbuffer,
+ uint32_t wm_surf_offset_texture)
+{
+ uint32_t wm_bind_bo_offset;
+ uint32_t *bind = (uint32_t *)
+ brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+ sizeof(uint32_t) *
+ BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
+ 32, /* alignment */
+ &wm_bind_bo_offset);
+ bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
+ wm_surf_offset_renderbuffer;
+ bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture;
+
+ return wm_bind_bo_offset;
+}
+
+
+/**
+ * SAMPLER_STATE. See brw_update_sampler_state().
+ */
+uint32_t
+gen6_blorp_emit_sampler_state(struct brw_context *brw,
+ unsigned tex_filter, unsigned max_lod,
+ bool non_normalized_coords)
+{
+ uint32_t sampler_offset;
+ uint32_t *sampler_state = (uint32_t *)
+ brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset);
+
+ unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+
+ /* XXX: I don't think that using firstLevel, lastLevel works,
+ * because we always setup the surface state as if firstLevel ==
+ * level zero. Probably have to subtract firstLevel from each of
+ * these:
+ */
+ brw_emit_sampler_state(brw,
+ sampler_state,
+ sampler_offset,
+ tex_filter, /* min filter */
+ tex_filter, /* mag filter */
+ BRW_MIPFILTER_NONE,
+ BRW_ANISORATIO_2,
+ address_rounding,
+ BRW_TEXCOORDMODE_CLAMP,
+ BRW_TEXCOORDMODE_CLAMP,
+ BRW_TEXCOORDMODE_CLAMP,
+ 0, /* min LOD */
+ max_lod,
+ 0, /* LOD bias */
+ 0, /* shadow function */
+ non_normalized_coords,
+ 0); /* border color offset - unused */
+
+ return sampler_offset;
+}
+
+
+/* 3DSTATE_CLIP
+ *
+ * Disable the clipper.
+ *
+ * The BLORP op emits a rectangle primitive, which requires clipping to
+ * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
+ * Section 1.3 "3D Primitives Overview":
+ * RECTLIST:
+ * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
+ * Mode should be set to a value other than CLIPMODE_NORMAL.
+ *
+ * Also disable perspective divide. This doesn't change the clipper's
+ * output, but does spare a few electrons.
+ */
+void
+gen6_blorp_emit_clip_disable(struct brw_context *brw)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+
+/* 3DSTATE_DRAWING_RECTANGLE */
+void
+gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) |
+ ((MAX2(params->y1, params->y0) - 1) << 16));
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+
+/* Once vertex fetcher has written full VUE entries with complete
+ * header the space requirement is as follows per vertex (in bytes):
+ *
+ * Header Position Program constants
+ * +--------+------------+-------------------+
+ * | 16 | 16 | n x 16 |
+ * +--------+------------+-------------------+
+ *
+ * where 'n' stands for number of varying inputs expressed as vec4s.
+ *
+ * The URB size is in turn expressed in 64 bytes (512 bits).
+ */
+static unsigned
+gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
+{
+ const unsigned num_varyings =
+ params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
+ const unsigned total_needed = 16 + 16 + num_varyings * 16;
+
+ return DIV_ROUND_UP(total_needed, 64);
+}
+
+/* 3DSTATE_URB_VS
+ * 3DSTATE_URB_HS
+ * 3DSTATE_URB_DS
+ * 3DSTATE_URB_GS
+ *
+ * If the 3DSTATE_URB_VS is emitted, than the others must be also.
+ * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
+ *
+ * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
+ * programmed in order for the programming of this state to be
+ * valid.
+ */
+void
+gen7_blorp_emit_urb_config(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params);
+
+ if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
+ brw->urb.vsize >= vs_entry_size)
+ return;
+
+ brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
+
+ gen7_upload_urb(brw, vs_entry_size, false, false);
+}
+
+
+/* 3DSTATE_BLEND_STATE_POINTERS */
+void
+gen7_blorp_emit_blend_state_pointer(struct brw_context *brw,
+ uint32_t cc_blend_state_offset)
+{
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(cc_blend_state_offset | 1);
+ ADVANCE_BATCH();
+}
+
+
+/* 3DSTATE_CC_STATE_POINTERS */
+void
+gen7_blorp_emit_cc_state_pointer(struct brw_context *brw,
+ uint32_t cc_state_offset)
+{
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
+ OUT_BATCH(cc_state_offset | 1);
+ ADVANCE_BATCH();
+}
+
+void
+gen7_blorp_emit_cc_viewport(struct brw_context *brw)
+{
+ struct brw_cc_viewport *ccv;
+ uint32_t cc_vp_offset;
+
+ ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
+ sizeof(*ccv), 32,
+ &cc_vp_offset);
+ ccv->min_depth = 0.0;
+ ccv->max_depth = 1.0;
+
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
+ OUT_BATCH(cc_vp_offset);
+ ADVANCE_BATCH();
+}
+
+
+/* 3DSTATE_TE
+ *
+ * Disable the tesselation engine.
+ */
+void
+gen7_blorp_emit_te_disable(struct brw_context *brw)
+{
+ BEGIN_BATCH(4);
+ OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
+
+
+void
+gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw,
+ uint32_t wm_bind_bo_offset)
+{
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
+ OUT_BATCH(wm_bind_bo_offset);
+ ADVANCE_BATCH();
+}
+
+
+void
+gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw,
+ uint32_t sampler_offset)
+{
+ BEGIN_BATCH(2);
+ OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
+ OUT_BATCH(sampler_offset);
+ ADVANCE_BATCH();
+}
+
+
+/* 3DSTATE_CLEAR_PARAMS
+ *
+ * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
+ * 3DSTATE_CLEAR_PARAMS:
+ * 3DSTATE_CLEAR_PARAMS must always be programmed in the along
+ * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
+ * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
+ */
+void
+gen7_blorp_emit_clear_params(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
+ OUT_BATCH(params->depth.clear_color.u32[0]);
+ OUT_BATCH(GEN7_DEPTH_CLEAR_VALID);
+ ADVANCE_BATCH();
+}
+
+
+/* 3DPRIMITIVE */
+void
+gen7_blorp_emit_primitive(struct brw_context *brw,
+ const struct brw_blorp_params *params)
+{
+ BEGIN_BATCH(7);
+ OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
+ OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
+ _3DPRIM_RECTLIST);
+ OUT_BATCH(3); /* vertex count per instance */
+ OUT_BATCH(0);
+ OUT_BATCH(params->num_layers); /* instance count */
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+}
#include "vbo/vbo.h"
#include "brw_draw.h"
-static void
-gen6_blorp_emit_input_varying_data(struct brw_context *brw,
- const struct brw_blorp_params *params,
- unsigned *offset,
- unsigned *size)
-{
- const unsigned vec4_size_in_bytes = 4 * sizeof(float);
- const unsigned max_num_varyings =
- DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
- const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
-
- *size = num_varyings * vec4_size_in_bytes;
-
- const float *const inputs_src = (const float *)¶ms->wm_inputs;
- float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
- *size, 32, offset);
-
- /* Walk over the attribute slots, determine if the attribute is used by
- * the program and when necessary copy the values from the input storage to
- * the vertex data buffer.
- */
- for (unsigned i = 0; i < max_num_varyings; i++) {
- const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
-
- if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
- continue;
-
- memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
-
- inputs += 4;
- }
-}
-
-static void
-gen6_blorp_emit_vertex_data(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- uint32_t vertex_offset;
- uint32_t const_data_offset = 0;
- unsigned const_data_size = 0;
-
- /* Setup VBO for the rectangle primitive..
- *
- * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
- * vertices. The vertices reside in screen space with DirectX coordinates
- * (that is, (0, 0) is the upper left corner).
- *
- * v2 ------ implied
- * | |
- * | |
- * v0 ----- v1
- *
- * Since the VS is disabled, the clipper loads each VUE directly from
- * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
- * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
- * dw0: Reserved, MBZ.
- * dw1: Render Target Array Index. The HiZ op does not use indexed
- * vertices, so set the dword to 0.
- * dw2: Viewport Index. The HiZ op disables viewport mapping and
- * scissoring, so set the dword to 0.
- * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
- * set the dword to 0.
- * dw4: Vertex Position X.
- * dw5: Vertex Position Y.
- * dw6: Vertex Position Z.
- * dw7: Vertex Position W.
- *
- * dw8: Flat vertex input 0
- * dw9: Flat vertex input 1
- * ...
- * dwn: Flat vertex input n - 8
- *
- * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
- * "Vertex URB Entry (VUE) Formats".
- *
- * Only vertex position X and Y are going to be variable, Z is fixed to
- * zero and W to one. Header words dw0-3 are all zero. There is no need to
- * include the fixed values in the vertex buffer. Vertex fetcher can be
- * instructed to fill vertex elements with constant values of one and zero
- * instead of reading them from the buffer.
- * Flat inputs are program constants that are not interpolated. Moreover
- * their values will be the same between vertices.
- *
- * See the vertex element setup below.
- */
- const float vertices[] = {
- /* v0 */ (float)params->x0, (float)params->y1,
- /* v1 */ (float)params->x1, (float)params->y1,
- /* v2 */ (float)params->x0, (float)params->y0,
- };
-
- float *const vertex_data = (float *)brw_state_batch(
- brw, AUB_TRACE_VERTEX_BUFFER,
- sizeof(vertices), 32,
- &vertex_offset);
- memcpy(vertex_data, vertices, sizeof(vertices));
-
- if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs)
- gen6_blorp_emit_input_varying_data(brw, params,
- &const_data_offset,
- &const_data_size);
-
- /* 3DSTATE_VERTEX_BUFFERS */
- const int num_buffers = 1 + (const_data_size > 0);
- const int batch_length = 1 + 4 * num_buffers;
-
- BEGIN_BATCH(batch_length);
- OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
-
- const unsigned blorp_num_vue_elems = 2;
- const unsigned stride = blorp_num_vue_elems * sizeof(float);
- EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo,
- vertex_offset, vertex_offset + sizeof(vertices),
- stride, 0 /* steprate */);
-
- if (const_data_size) {
- /* Tell vertex fetcher not to advance the pointer in the buffer when
- * moving to the next vertex. This will effectively provide the same
- * data for all the vertices. For flat inputs only the data provided
- * for the first provoking vertex actually matters.
- */
- const unsigned stride_zero = 0;
- EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo,
- const_data_offset,
- const_data_offset + const_data_size,
- stride_zero, 0 /* step_rate */);
- }
-
- ADVANCE_BATCH();
-}
-
-void
-gen6_blorp_emit_vertices(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- gen6_blorp_emit_vertex_data(brw, params);
-
- const unsigned num_varyings =
- params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
- const unsigned num_elements = 2 + num_varyings;
- const int batch_length = 1 + 2 * num_elements;
-
- BEGIN_BATCH(batch_length);
-
- /* 3DSTATE_VERTEX_ELEMENTS
- *
- * Fetch dwords 0 - 7 from each VUE. See the comments above where
- * the vertex_bo is filled with data. First element contains dwords
- * for the VUE header, second the actual position values and the
- * remaining contain the flat inputs.
- */
- {
- OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
- /* Element 0 */
- OUT_BATCH(GEN6_VE0_VALID |
- BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
- 0 << BRW_VE0_SRC_OFFSET_SHIFT);
- OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
- BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
- BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
- BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
- /* Element 1 */
- OUT_BATCH(GEN6_VE0_VALID |
- BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT |
- 0 << BRW_VE0_SRC_OFFSET_SHIFT);
- OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
- BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
- BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
- BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
- }
-
- for (unsigned i = 0; i < num_varyings; ++i) {
- /* Element 2 + i */
- OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT |
- GEN6_VE0_VALID |
- BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
- (i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT);
- OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
- BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
- BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
- BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
- }
-
- ADVANCE_BATCH();
-}
-
-
/* 3DSTATE_URB
*
* Assign the entire URB to the VS. Even though the VS disabled, URB space
}
-/* BLEND_STATE */
-uint32_t
-gen6_blorp_emit_blend_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- uint32_t cc_blend_state_offset;
-
- assume(params->num_draw_buffers);
-
- const unsigned size = params->num_draw_buffers *
- sizeof(struct gen6_blend_state);
- struct gen6_blend_state *blend = (struct gen6_blend_state *)
- brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64,
- &cc_blend_state_offset);
-
- memset(blend, 0, size);
-
- for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
- blend[i].blend1.pre_blend_clamp_enable = 1;
- blend[i].blend1.post_blend_clamp_enable = 1;
- blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
-
- blend[i].blend1.write_disable_r = params->color_write_disable[0];
- blend[i].blend1.write_disable_g = params->color_write_disable[1];
- blend[i].blend1.write_disable_b = params->color_write_disable[2];
- blend[i].blend1.write_disable_a = params->color_write_disable[3];
- }
-
- return cc_blend_state_offset;
-}
-
-
-/* CC_STATE */
-uint32_t
-gen6_blorp_emit_cc_state(struct brw_context *brw)
-{
- uint32_t cc_state_offset;
-
- struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *)
- brw_state_batch(brw, AUB_TRACE_CC_STATE,
- sizeof(gen6_color_calc_state), 64,
- &cc_state_offset);
- memset(cc, 0, sizeof(*cc));
-
- return cc_state_offset;
-}
-
-
-/**
- * \param out_offset is relative to
- * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
- */
-uint32_t
-gen6_blorp_emit_depth_stencil_state(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- uint32_t depthstencil_offset;
-
- struct gen6_depth_stencil_state *state;
- state = (struct gen6_depth_stencil_state *)
- brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
- sizeof(*state), 64,
- &depthstencil_offset);
- memset(state, 0, sizeof(*state));
-
- /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
- * - 7.5.3.1 Depth Buffer Clear
- * - 7.5.3.2 Depth Buffer Resolve
- * - 7.5.3.3 Hierarchical Depth Buffer Resolve
- */
- state->ds2.depth_write_enable = 1;
- if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
- state->ds2.depth_test_enable = 1;
- state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER;
- }
-
- return depthstencil_offset;
-}
-
-
/* 3DSTATE_CC_STATE_POINTERS
*
* The pointer offsets are relative to
}
-/* BINDING_TABLE. See brw_wm_binding_table(). */
-uint32_t
-gen6_blorp_emit_binding_table(struct brw_context *brw,
- uint32_t wm_surf_offset_renderbuffer,
- uint32_t wm_surf_offset_texture)
-{
- uint32_t wm_bind_bo_offset;
- uint32_t *bind = (uint32_t *)
- brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
- sizeof(uint32_t) *
- BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
- 32, /* alignment */
- &wm_bind_bo_offset);
- bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
- wm_surf_offset_renderbuffer;
- bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture;
-
- return wm_bind_bo_offset;
-}
-
-
-/**
- * SAMPLER_STATE. See brw_update_sampler_state().
- */
-uint32_t
-gen6_blorp_emit_sampler_state(struct brw_context *brw,
- unsigned tex_filter, unsigned max_lod,
- bool non_normalized_coords)
-{
- uint32_t sampler_offset;
- uint32_t *sampler_state = (uint32_t *)
- brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset);
-
- unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
- BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
- BRW_ADDRESS_ROUNDING_ENABLE_R_MIN |
- BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
- BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
- BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
-
- /* XXX: I don't think that using firstLevel, lastLevel works,
- * because we always setup the surface state as if firstLevel ==
- * level zero. Probably have to subtract firstLevel from each of
- * these:
- */
- brw_emit_sampler_state(brw,
- sampler_state,
- sampler_offset,
- tex_filter, /* min filter */
- tex_filter, /* mag filter */
- BRW_MIPFILTER_NONE,
- BRW_ANISORATIO_2,
- address_rounding,
- BRW_TEXCOORDMODE_CLAMP,
- BRW_TEXCOORDMODE_CLAMP,
- BRW_TEXCOORDMODE_CLAMP,
- 0, /* min LOD */
- max_lod,
- 0, /* LOD bias */
- 0, /* shadow function */
- non_normalized_coords,
- 0); /* border color offset - unused */
-
- return sampler_offset;
-}
-
-
/**
* 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers().
*/
}
-/* 3DSTATE_CLIP
- *
- * Disable the clipper.
- *
- * The BLORP op emits a rectangle primitive, which requires clipping to
- * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
- * Section 1.3 "3D Primitives Overview":
- * RECTLIST:
- * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
- * Mode should be set to a value other than CLIPMODE_NORMAL.
- *
- * Also disable perspective divide. This doesn't change the clipper's
- * output, but does spare a few electrons.
- */
-void
-gen6_blorp_emit_clip_disable(struct brw_context *brw)
-{
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-}
-
-
/* 3DSTATE_SF
*
* Disable ViewportTransformEnable (dw2.1)
ADVANCE_BATCH();
}
-
-/* 3DSTATE_DRAWING_RECTANGLE */
-void
-gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) |
- ((MAX2(params->y1, params->y0) - 1) << 16));
- OUT_BATCH(0);
- ADVANCE_BATCH();
-}
-
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
static void
gen6_blorp_emit_viewport_state(struct brw_context *brw,
#include "blorp_priv.h"
-/* Once vertex fetcher has written full VUE entries with complete
- * header the space requirement is as follows per vertex (in bytes):
- *
- * Header Position Program constants
- * +--------+------------+-------------------+
- * | 16 | 16 | n x 16 |
- * +--------+------------+-------------------+
- *
- * where 'n' stands for number of varying inputs expressed as vec4s.
- *
- * The URB size is in turn expressed in 64 bytes (512 bits).
- */
-static unsigned
-gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
-{
- const unsigned num_varyings =
- params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
- const unsigned total_needed = 16 + 16 + num_varyings * 16;
-
- return DIV_ROUND_UP(total_needed, 64);
-}
-
-/* 3DSTATE_URB_VS
- * 3DSTATE_URB_HS
- * 3DSTATE_URB_DS
- * 3DSTATE_URB_GS
- *
- * If the 3DSTATE_URB_VS is emitted, than the others must be also.
- * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
- *
- * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
- * programmed in order for the programming of this state to be
- * valid.
- */
-void
-gen7_blorp_emit_urb_config(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params);
-
- if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
- brw->urb.vsize >= vs_entry_size)
- return;
-
- brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
-
- gen7_upload_urb(brw, vs_entry_size, false, false);
-}
-
-
-/* 3DSTATE_BLEND_STATE_POINTERS */
-void
-gen7_blorp_emit_blend_state_pointer(struct brw_context *brw,
- uint32_t cc_blend_state_offset)
-{
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS << 16 | (2 - 2));
- OUT_BATCH(cc_blend_state_offset | 1);
- ADVANCE_BATCH();
-}
-
-
-/* 3DSTATE_CC_STATE_POINTERS */
-void
-gen7_blorp_emit_cc_state_pointer(struct brw_context *brw,
- uint32_t cc_state_offset)
-{
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (2 - 2));
- OUT_BATCH(cc_state_offset | 1);
- ADVANCE_BATCH();
-}
-
-void
-gen7_blorp_emit_cc_viewport(struct brw_context *brw)
-{
- struct brw_cc_viewport *ccv;
- uint32_t cc_vp_offset;
-
- ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
- sizeof(*ccv), 32,
- &cc_vp_offset);
- ccv->min_depth = 0.0;
- ccv->max_depth = 1.0;
-
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC << 16 | (2 - 2));
- OUT_BATCH(cc_vp_offset);
- ADVANCE_BATCH();
-}
-
/* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
*
}
-/* 3DSTATE_TE
- *
- * Disable the tesselation engine.
- */
-void
-gen7_blorp_emit_te_disable(struct brw_context *brw)
-{
- BEGIN_BATCH(4);
- OUT_BATCH(_3DSTATE_TE << 16 | (4 - 2));
- OUT_BATCH(0);
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-}
-
-
/* 3DSTATE_DS
*
* Disable the domain shader.
}
-void
-gen7_blorp_emit_binding_table_pointers_ps(struct brw_context *brw,
- uint32_t wm_bind_bo_offset)
-{
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
- OUT_BATCH(wm_bind_bo_offset);
- ADVANCE_BATCH();
-}
-
-
-void
-gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context *brw,
- uint32_t sampler_offset)
-{
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
- OUT_BATCH(sampler_offset);
- ADVANCE_BATCH();
-}
-
static void
gen7_blorp_emit_depth_stencil_config(struct brw_context *brw,
const struct brw_blorp_params *params)
}
-/* 3DSTATE_CLEAR_PARAMS
- *
- * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
- * 3DSTATE_CLEAR_PARAMS:
- * 3DSTATE_CLEAR_PARAMS must always be programmed in the along
- * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
- * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
- */
-void
-gen7_blorp_emit_clear_params(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- BEGIN_BATCH(3);
- OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
- OUT_BATCH(params->depth.clear_color.u32[0]);
- OUT_BATCH(GEN7_DEPTH_CLEAR_VALID);
- ADVANCE_BATCH();
-}
-
-
-/* 3DPRIMITIVE */
-void
-gen7_blorp_emit_primitive(struct brw_context *brw,
- const struct brw_blorp_params *params)
-{
- BEGIN_BATCH(7);
- OUT_BATCH(CMD_3D_PRIM << 16 | (7 - 2));
- OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL |
- _3DPRIM_RECTLIST);
- OUT_BATCH(3); /* vertex count per instance */
- OUT_BATCH(0);
- OUT_BATCH(params->num_layers); /* instance count */
- OUT_BATCH(0);
- OUT_BATCH(0);
- ADVANCE_BATCH();
-}
-
-
/**
* \copydoc gen6_blorp_exec()
*/