2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
29 #include "brw_context.h"
30 #include "brw_defines.h"
31 #include "brw_state.h"
33 #include "blorp_priv.h"
38 gen6_blorp_emit_input_varying_data(struct brw_context
*brw
,
39 const struct brw_blorp_params
*params
,
43 const unsigned vec4_size_in_bytes
= 4 * sizeof(float);
44 const unsigned max_num_varyings
=
45 DIV_ROUND_UP(sizeof(params
->wm_inputs
), vec4_size_in_bytes
);
46 const unsigned num_varyings
= params
->wm_prog_data
->num_varying_inputs
;
48 *size
= num_varyings
* vec4_size_in_bytes
;
50 const float *const inputs_src
= (const float *)¶ms
->wm_inputs
;
51 float *inputs
= (float *)brw_state_batch(brw
, AUB_TRACE_VERTEX_BUFFER
,
54 /* Walk over the attribute slots, determine if the attribute is used by
55 * the program and when necessary copy the values from the input storage to
56 * the vertex data buffer.
58 for (unsigned i
= 0; i
< max_num_varyings
; i
++) {
59 const gl_varying_slot attr
= VARYING_SLOT_VAR0
+ i
;
61 if (!(params
->wm_prog_data
->inputs_read
& BITFIELD64_BIT(attr
)))
64 memcpy(inputs
, inputs_src
+ i
* 4, vec4_size_in_bytes
);
71 gen6_blorp_emit_vertex_data(struct brw_context
*brw
,
72 const struct brw_blorp_params
*params
)
74 uint32_t vertex_offset
;
75 uint32_t const_data_offset
= 0;
76 unsigned const_data_size
= 0;
78 /* Setup VBO for the rectangle primitive..
80 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
81 * vertices. The vertices reside in screen space with DirectX coordinates
82 * (that is, (0, 0) is the upper left corner).
89 * Since the VS is disabled, the clipper loads each VUE directly from
90 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
91 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
93 * dw1: Render Target Array Index. The HiZ op does not use indexed
94 * vertices, so set the dword to 0.
95 * dw2: Viewport Index. The HiZ op disables viewport mapping and
96 * scissoring, so set the dword to 0.
97 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
99 * dw4: Vertex Position X.
100 * dw5: Vertex Position Y.
101 * dw6: Vertex Position Z.
102 * dw7: Vertex Position W.
104 * dw8: Flat vertex input 0
105 * dw9: Flat vertex input 1
107 * dwn: Flat vertex input n - 8
109 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
110 * "Vertex URB Entry (VUE) Formats".
112 * Only vertex position X and Y are going to be variable, Z is fixed to
113 * zero and W to one. Header words dw0-3 are all zero. There is no need to
114 * include the fixed values in the vertex buffer. Vertex fetcher can be
115 * instructed to fill vertex elements with constant values of one and zero
116 * instead of reading them from the buffer.
117 * Flat inputs are program constants that are not interpolated. Moreover
118 * their values will be the same between vertices.
120 * See the vertex element setup below.
122 const float vertices
[] = {
123 /* v0 */ (float)params
->x0
, (float)params
->y1
,
124 /* v1 */ (float)params
->x1
, (float)params
->y1
,
125 /* v2 */ (float)params
->x0
, (float)params
->y0
,
128 float *const vertex_data
= (float *)brw_state_batch(
129 brw
, AUB_TRACE_VERTEX_BUFFER
,
130 sizeof(vertices
), 32,
132 memcpy(vertex_data
, vertices
, sizeof(vertices
));
134 if (params
->wm_prog_data
&& params
->wm_prog_data
->num_varying_inputs
)
135 gen6_blorp_emit_input_varying_data(brw
, params
,
139 /* 3DSTATE_VERTEX_BUFFERS */
140 const int num_buffers
= 1 + (const_data_size
> 0);
141 const int batch_length
= 1 + 4 * num_buffers
;
143 BEGIN_BATCH(batch_length
);
144 OUT_BATCH((_3DSTATE_VERTEX_BUFFERS
<< 16) | (batch_length
- 2));
146 const unsigned blorp_num_vue_elems
= 2;
147 const unsigned stride
= blorp_num_vue_elems
* sizeof(float);
148 EMIT_VERTEX_BUFFER_STATE(brw
, 0 /* buffer_nr */, brw
->batch
.bo
,
149 vertex_offset
, vertex_offset
+ sizeof(vertices
),
150 stride
, 0 /* steprate */);
152 if (const_data_size
) {
153 /* Tell vertex fetcher not to advance the pointer in the buffer when
154 * moving to the next vertex. This will effectively provide the same
155 * data for all the vertices. For flat inputs only the data provided
156 * for the first provoking vertex actually matters.
158 const unsigned stride_zero
= 0;
159 EMIT_VERTEX_BUFFER_STATE(brw
, 1 /* buffer_nr */, brw
->batch
.bo
,
161 const_data_offset
+ const_data_size
,
162 stride_zero
, 0 /* step_rate */);
169 gen6_blorp_emit_vertices(struct brw_context
*brw
,
170 const struct brw_blorp_params
*params
)
172 gen6_blorp_emit_vertex_data(brw
, params
);
174 const unsigned num_varyings
=
175 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
176 const unsigned num_elements
= 2 + num_varyings
;
177 const int batch_length
= 1 + 2 * num_elements
;
179 BEGIN_BATCH(batch_length
);
181 /* 3DSTATE_VERTEX_ELEMENTS
183 * Fetch dwords 0 - 7 from each VUE. See the comments above where
184 * the vertex_bo is filled with data. First element contains dwords
185 * for the VUE header, second the actual position values and the
186 * remaining contain the flat inputs.
189 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS
<< 16) | (batch_length
- 2));
191 OUT_BATCH(GEN6_VE0_VALID
|
192 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
<< BRW_VE0_FORMAT_SHIFT
|
193 0 << BRW_VE0_SRC_OFFSET_SHIFT
);
194 OUT_BATCH(BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_0_SHIFT
|
195 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_1_SHIFT
|
196 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_2_SHIFT
|
197 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_3_SHIFT
);
199 OUT_BATCH(GEN6_VE0_VALID
|
200 BRW_SURFACEFORMAT_R32G32_FLOAT
<< BRW_VE0_FORMAT_SHIFT
|
201 0 << BRW_VE0_SRC_OFFSET_SHIFT
);
202 OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
203 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_1_SHIFT
|
204 BRW_VE1_COMPONENT_STORE_0
<< BRW_VE1_COMPONENT_2_SHIFT
|
205 BRW_VE1_COMPONENT_STORE_1_FLT
<< BRW_VE1_COMPONENT_3_SHIFT
);
208 for (unsigned i
= 0; i
< num_varyings
; ++i
) {
210 OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT
|
212 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT
<< BRW_VE0_FORMAT_SHIFT
|
213 (i
* 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT
);
214 OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_0_SHIFT
|
215 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_1_SHIFT
|
216 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_2_SHIFT
|
217 BRW_VE1_COMPONENT_STORE_SRC
<< BRW_VE1_COMPONENT_3_SHIFT
);
226 gen6_blorp_emit_blend_state(struct brw_context
*brw
,
227 const struct brw_blorp_params
*params
)
229 uint32_t cc_blend_state_offset
;
231 assume(params
->num_draw_buffers
);
233 const unsigned size
= params
->num_draw_buffers
*
234 sizeof(struct gen6_blend_state
);
235 struct gen6_blend_state
*blend
= (struct gen6_blend_state
*)
236 brw_state_batch(brw
, AUB_TRACE_BLEND_STATE
, size
, 64,
237 &cc_blend_state_offset
);
239 memset(blend
, 0, size
);
241 for (unsigned i
= 0; i
< params
->num_draw_buffers
; ++i
) {
242 blend
[i
].blend1
.pre_blend_clamp_enable
= 1;
243 blend
[i
].blend1
.post_blend_clamp_enable
= 1;
244 blend
[i
].blend1
.clamp_range
= BRW_RENDERTARGET_CLAMPRANGE_FORMAT
;
246 blend
[i
].blend1
.write_disable_r
= params
->color_write_disable
[0];
247 blend
[i
].blend1
.write_disable_g
= params
->color_write_disable
[1];
248 blend
[i
].blend1
.write_disable_b
= params
->color_write_disable
[2];
249 blend
[i
].blend1
.write_disable_a
= params
->color_write_disable
[3];
252 return cc_blend_state_offset
;
258 gen6_blorp_emit_cc_state(struct brw_context
*brw
)
260 uint32_t cc_state_offset
;
262 struct gen6_color_calc_state
*cc
= (struct gen6_color_calc_state
*)
263 brw_state_batch(brw
, AUB_TRACE_CC_STATE
,
264 sizeof(gen6_color_calc_state
), 64,
266 memset(cc
, 0, sizeof(*cc
));
268 return cc_state_offset
;
273 * \param out_offset is relative to
274 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
277 gen6_blorp_emit_depth_stencil_state(struct brw_context
*brw
,
278 const struct brw_blorp_params
*params
)
280 uint32_t depthstencil_offset
;
282 struct gen6_depth_stencil_state
*state
;
283 state
= (struct gen6_depth_stencil_state
*)
284 brw_state_batch(brw
, AUB_TRACE_DEPTH_STENCIL_STATE
,
286 &depthstencil_offset
);
287 memset(state
, 0, sizeof(*state
));
289 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
290 * - 7.5.3.1 Depth Buffer Clear
291 * - 7.5.3.2 Depth Buffer Resolve
292 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
294 state
->ds2
.depth_write_enable
= 1;
295 if (params
->hiz_op
== GEN6_HIZ_OP_DEPTH_RESOLVE
) {
296 state
->ds2
.depth_test_enable
= 1;
297 state
->ds2
.depth_test_func
= BRW_COMPAREFUNCTION_NEVER
;
300 return depthstencil_offset
;
304 /* BINDING_TABLE. See brw_wm_binding_table(). */
306 gen6_blorp_emit_binding_table(struct brw_context
*brw
,
307 uint32_t wm_surf_offset_renderbuffer
,
308 uint32_t wm_surf_offset_texture
)
310 uint32_t wm_bind_bo_offset
;
311 uint32_t *bind
= (uint32_t *)
312 brw_state_batch(brw
, AUB_TRACE_BINDING_TABLE
,
314 BRW_BLORP_NUM_BINDING_TABLE_ENTRIES
,
317 bind
[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX
] =
318 wm_surf_offset_renderbuffer
;
319 bind
[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX
] = wm_surf_offset_texture
;
321 return wm_bind_bo_offset
;
326 * SAMPLER_STATE. See brw_update_sampler_state().
329 gen6_blorp_emit_sampler_state(struct brw_context
*brw
,
330 unsigned tex_filter
, unsigned max_lod
,
331 bool non_normalized_coords
)
333 uint32_t sampler_offset
;
334 uint32_t *sampler_state
= (uint32_t *)
335 brw_state_batch(brw
, AUB_TRACE_SAMPLER_STATE
, 16, 32, &sampler_offset
);
337 unsigned address_rounding
= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN
|
338 BRW_ADDRESS_ROUNDING_ENABLE_V_MIN
|
339 BRW_ADDRESS_ROUNDING_ENABLE_R_MIN
|
340 BRW_ADDRESS_ROUNDING_ENABLE_U_MAG
|
341 BRW_ADDRESS_ROUNDING_ENABLE_V_MAG
|
342 BRW_ADDRESS_ROUNDING_ENABLE_R_MAG
;
344 /* XXX: I don't think that using firstLevel, lastLevel works,
345 * because we always setup the surface state as if firstLevel ==
346 * level zero. Probably have to subtract firstLevel from each of
349 brw_emit_sampler_state(brw
,
352 tex_filter
, /* min filter */
353 tex_filter
, /* mag filter */
357 BRW_TEXCOORDMODE_CLAMP
,
358 BRW_TEXCOORDMODE_CLAMP
,
359 BRW_TEXCOORDMODE_CLAMP
,
363 0, /* shadow function */
364 non_normalized_coords
,
365 0); /* border color offset - unused */
367 return sampler_offset
;
373 * Disable the clipper.
375 * The BLORP op emits a rectangle primitive, which requires clipping to
376 * be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
377 * Section 1.3 "3D Primitives Overview":
379 * Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
380 * Mode should be set to a value other than CLIPMODE_NORMAL.
382 * Also disable perspective divide. This doesn't change the clipper's
383 * output, but does spare a few electrons.
386 gen6_blorp_emit_clip_disable(struct brw_context
*brw
)
389 OUT_BATCH(_3DSTATE_CLIP
<< 16 | (4 - 2));
391 OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE
);
397 /* 3DSTATE_DRAWING_RECTANGLE */
399 gen6_blorp_emit_drawing_rectangle(struct brw_context
*brw
,
400 const struct brw_blorp_params
*params
)
403 OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE
<< 16 | (4 - 2));
405 OUT_BATCH(((MAX2(params
->x1
, params
->x0
) - 1) & 0xffff) |
406 ((MAX2(params
->y1
, params
->y0
) - 1) << 16));
412 /* Once vertex fetcher has written full VUE entries with complete
413 * header the space requirement is as follows per vertex (in bytes):
415 * Header Position Program constants
416 * +--------+------------+-------------------+
417 * | 16 | 16 | n x 16 |
418 * +--------+------------+-------------------+
420 * where 'n' stands for number of varying inputs expressed as vec4s.
422 * The URB size is in turn expressed in 64 bytes (512 bits).
425 gen7_blorp_get_vs_entry_size(const struct brw_blorp_params
*params
)
427 const unsigned num_varyings
=
428 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
429 const unsigned total_needed
= 16 + 16 + num_varyings
* 16;
431 return DIV_ROUND_UP(total_needed
, 64);
439 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
440 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
442 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
443 * programmed in order for the programming of this state to be
447 gen7_blorp_emit_urb_config(struct brw_context
*brw
,
448 const struct brw_blorp_params
*params
)
450 const unsigned vs_entry_size
= gen7_blorp_get_vs_entry_size(params
);
452 if (!(brw
->ctx
.NewDriverState
& (BRW_NEW_CONTEXT
| BRW_NEW_URB_SIZE
)) &&
453 brw
->urb
.vsize
>= vs_entry_size
)
456 brw
->ctx
.NewDriverState
|= BRW_NEW_URB_SIZE
;
458 gen7_upload_urb(brw
, vs_entry_size
, false, false);
462 /* 3DSTATE_BLEND_STATE_POINTERS */
464 gen7_blorp_emit_blend_state_pointer(struct brw_context
*brw
,
465 uint32_t cc_blend_state_offset
)
468 OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS
<< 16 | (2 - 2));
469 OUT_BATCH(cc_blend_state_offset
| 1);
474 /* 3DSTATE_CC_STATE_POINTERS */
476 gen7_blorp_emit_cc_state_pointer(struct brw_context
*brw
,
477 uint32_t cc_state_offset
)
480 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS
<< 16 | (2 - 2));
481 OUT_BATCH(cc_state_offset
| 1);
486 gen7_blorp_emit_cc_viewport(struct brw_context
*brw
)
488 struct brw_cc_viewport
*ccv
;
489 uint32_t cc_vp_offset
;
491 ccv
= (struct brw_cc_viewport
*)brw_state_batch(brw
, AUB_TRACE_CC_VP_STATE
,
494 ccv
->min_depth
= 0.0;
495 ccv
->max_depth
= 1.0;
498 OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC
<< 16 | (2 - 2));
499 OUT_BATCH(cc_vp_offset
);
506 * Disable the tesselation engine.
509 gen7_blorp_emit_te_disable(struct brw_context
*brw
)
512 OUT_BATCH(_3DSTATE_TE
<< 16 | (4 - 2));
521 gen7_blorp_emit_binding_table_pointers_ps(struct brw_context
*brw
,
522 uint32_t wm_bind_bo_offset
)
525 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS
<< 16 | (2 - 2));
526 OUT_BATCH(wm_bind_bo_offset
);
532 gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context
*brw
,
533 uint32_t sampler_offset
)
536 OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS
<< 16 | (2 - 2));
537 OUT_BATCH(sampler_offset
);
542 /* 3DSTATE_CLEAR_PARAMS
544 * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
545 * 3DSTATE_CLEAR_PARAMS:
546 * 3DSTATE_CLEAR_PARAMS must always be programmed in the along
547 * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
548 * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
551 gen7_blorp_emit_clear_params(struct brw_context
*brw
,
552 const struct brw_blorp_params
*params
)
555 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS
<< 16 | (3 - 2));
556 OUT_BATCH(params
->depth
.clear_color
.u32
[0]);
557 OUT_BATCH(GEN7_DEPTH_CLEAR_VALID
);
564 gen7_blorp_emit_primitive(struct brw_context
*brw
,
565 const struct brw_blorp_params
*params
)
568 OUT_BATCH(CMD_3D_PRIM
<< 16 | (7 - 2));
569 OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
|
571 OUT_BATCH(3); /* vertex count per instance */
573 OUT_BATCH(params
->num_layers
); /* instance count */