2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
29 #include "brw_context.h"
30 #include "brw_state.h"
32 #include "blorp_priv.h"
34 #include "genxml/gen_macros.h"
37 blorp_emit_dwords(struct brw_context
*brw
, unsigned n
)
39 intel_batchbuffer_begin(brw
, n
, RENDER_RING
);
40 uint32_t *map
= brw
->batch
.map_next
;
41 brw
->batch
.map_next
+= n
;
42 intel_batchbuffer_advance(brw
);
46 struct blorp_address
{
48 uint32_t read_domains
;
49 uint32_t write_domain
;
54 blorp_emit_reloc(struct brw_context
*brw
, void *location
,
55 struct blorp_address address
, uint32_t delta
)
57 uint32_t offset
= (char *)location
- (char *)brw
->batch
.map
;
59 return intel_batchbuffer_reloc64(brw
, address
.buffer
, offset
,
62 address
.offset
+ delta
);
64 return intel_batchbuffer_reloc(brw
, address
.buffer
, offset
,
67 address
.offset
+ delta
);
71 #define __gen_address_type struct blorp_address
72 #define __gen_user_data struct brw_context
75 __gen_combine_address(struct brw_context
*brw
, void *location
,
76 struct blorp_address address
, uint32_t delta
)
78 if (address
.buffer
== NULL
) {
79 return address
.offset
+ delta
;
81 return blorp_emit_reloc(brw
, location
, address
, delta
);
85 #include "genxml/genX_pack.h"
87 #define _blorp_cmd_length(cmd) cmd ## _length
88 #define _blorp_cmd_header(cmd) cmd ## _header
89 #define _blorp_cmd_pack(cmd) cmd ## _pack
91 #define blorp_emit(brw, cmd, name) \
92 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
93 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \
94 __builtin_expect(_dst != NULL, 1); \
95 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \
99 blorp_emit_sf_config(struct brw_context
*brw
,
100 const struct brw_blorp_params
*params
)
102 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
106 * Disable ViewportTransformEnable (dw2.1)
108 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
109 * Primitives Overview":
110 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
111 * use of screen- space coordinates).
113 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
114 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
116 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
117 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
118 * SOLID: Any triangle or rectangle object found to be front-facing
119 * is rendered as a solid object. This setting is required when
120 * (rendering rectangle (RECTLIST) objects.
122 blorp_emit(brw
, GENX(3DSTATE_SF
), sf
) {
123 sf
.FrontFaceFillMode
= FILL_MODE_SOLID
;
124 sf
.BackFaceFillMode
= FILL_MODE_SOLID
;
126 sf
.MultisampleRasterizationMode
= params
->dst
.surf
.samples
> 1 ?
127 MSRASTMODE_ON_PATTERN
: MSRASTMODE_OFF_PIXEL
;
129 sf
.VertexURBEntryReadOffset
= BRW_SF_URB_ENTRY_READ_OFFSET
;
131 sf
.NumberofSFOutputAttributes
= prog_data
->num_varying_inputs
;
132 sf
.VertexURBEntryReadLength
= brw_blorp_get_urb_length(prog_data
);
133 sf
.ConstantInterpolationEnable
= prog_data
->flat_inputs
;
135 sf
.NumberofSFOutputAttributes
= 0;
136 sf
.VertexURBEntryReadLength
= 1;
142 blorp_emit_wm_config(struct brw_context
*brw
,
143 const struct brw_blorp_params
*params
)
145 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
147 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
148 * nonzero to prevent the GPU from hanging. While the documentation doesn't
149 * mention this explicitly, it notes that the valid range for the field is
150 * [1,39] = [2,40] threads, which excludes zero.
152 * To be safe (and to minimize extraneous code) we go ahead and fully
153 * configure the WM state whether or not there is a WM program.
155 blorp_emit(brw
, GENX(3DSTATE_WM
), wm
) {
156 wm
.MaximumNumberofThreads
= brw
->max_wm_threads
- 1;
158 switch (params
->hiz_op
) {
159 case GEN6_HIZ_OP_DEPTH_CLEAR
:
160 wm
.DepthBufferClear
= true;
162 case GEN6_HIZ_OP_DEPTH_RESOLVE
:
163 wm
.DepthBufferResolveEnable
= true;
165 case GEN6_HIZ_OP_HIZ_RESOLVE
:
166 wm
.HierarchicalDepthBufferResolveEnable
= true;
168 case GEN6_HIZ_OP_NONE
:
171 unreachable("not reached");
175 wm
.ThreadDispatchEnable
= true;
177 wm
.DispatchGRFStartRegisterforConstantSetupData0
=
178 prog_data
->first_curbe_grf_0
;
179 wm
.DispatchGRFStartRegisterforConstantSetupData2
=
180 prog_data
->first_curbe_grf_2
;
182 wm
.KernelStartPointer0
= params
->wm_prog_kernel
;
183 wm
.KernelStartPointer2
=
184 params
->wm_prog_kernel
+ prog_data
->ksp_offset_2
;
186 wm
._8PixelDispatchEnable
= prog_data
->dispatch_8
;
187 wm
._16PixelDispatchEnable
= prog_data
->dispatch_16
;
189 wm
.NumberofSFOutputAttributes
= prog_data
->num_varying_inputs
;
192 if (params
->src
.bo
) {
193 wm
.SamplerCount
= 1; /* Up to 4 samplers */
194 wm
.PixelShaderKillPixel
= true; /* TODO: temporarily smash on */
197 if (params
->dst
.surf
.samples
> 1) {
198 wm
.MultisampleRasterizationMode
= MSRASTMODE_ON_PATTERN
;
199 wm
.MultisampleDispatchMode
=
200 (prog_data
&& prog_data
->persample_msaa_dispatch
) ?
201 MSDISPMODE_PERSAMPLE
: MSDISPMODE_PERPIXEL
;
203 wm
.MultisampleRasterizationMode
= MSRASTMODE_OFF_PIXEL
;
204 wm
.MultisampleDispatchMode
= MSDISPMODE_PERSAMPLE
;
211 blorp_emit_depth_stencil_config(struct brw_context
*brw
,
212 const struct brw_blorp_params
*params
)
214 brw_emit_depth_stall_flushes(brw
);
216 blorp_emit(brw
, GENX(3DSTATE_DEPTH_BUFFER
), db
) {
217 switch (params
->depth
.surf
.dim
) {
218 case ISL_SURF_DIM_1D
:
219 db
.SurfaceType
= SURFTYPE_1D
;
221 case ISL_SURF_DIM_2D
:
222 db
.SurfaceType
= SURFTYPE_2D
;
224 case ISL_SURF_DIM_3D
:
225 db
.SurfaceType
= SURFTYPE_3D
;
229 db
.SurfaceFormat
= params
->depth_format
;
231 db
.TiledSurface
= true;
232 db
.TileWalk
= TILEWALK_YMAJOR
;
233 db
.MIPMapLayoutMode
= MIPLAYOUT_BELOW
;
235 db
.HierarchicalDepthBufferEnable
= true;
236 db
.SeparateStencilBufferEnable
= true;
238 db
.Width
= params
->depth
.surf
.logical_level0_px
.width
- 1;
239 db
.Height
= params
->depth
.surf
.logical_level0_px
.height
- 1;
240 db
.RenderTargetViewExtent
= db
.Depth
=
241 MAX2(params
->depth
.surf
.logical_level0_px
.depth
,
242 params
->depth
.surf
.logical_level0_px
.array_len
) - 1;
244 db
.LOD
= params
->depth
.view
.base_level
;
245 db
.MinimumArrayElement
= params
->depth
.view
.base_array_layer
;
247 db
.SurfacePitch
= params
->depth
.surf
.row_pitch
- 1;
248 db
.SurfaceBaseAddress
= (struct blorp_address
) {
249 .buffer
= params
->depth
.bo
,
250 .read_domains
= I915_GEM_DOMAIN_RENDER
,
251 .write_domain
= I915_GEM_DOMAIN_RENDER
,
252 .offset
= params
->depth
.offset
,
256 blorp_emit(brw
, GENX(3DSTATE_HIER_DEPTH_BUFFER
), hiz
) {
257 hiz
.SurfacePitch
= params
->depth
.aux_surf
.row_pitch
- 1;
258 hiz
.SurfaceBaseAddress
= (struct blorp_address
) {
259 .buffer
= params
->depth
.aux_bo
,
260 .read_domains
= I915_GEM_DOMAIN_RENDER
,
261 .write_domain
= I915_GEM_DOMAIN_RENDER
,
262 .offset
= params
->depth
.aux_offset
,
266 blorp_emit(brw
, GENX(3DSTATE_STENCIL_BUFFER
), sb
);
270 blorp_emit_blend_state(struct brw_context
*brw
,
271 const struct brw_blorp_params
*params
)
273 struct GENX(BLEND_STATE
) blend
;
274 memset(&blend
, 0, sizeof(blend
));
276 for (unsigned i
= 0; i
< params
->num_draw_buffers
; ++i
) {
277 blend
.Entry
[i
].PreBlendColorClampEnable
= true;
278 blend
.Entry
[i
].PostBlendColorClampEnable
= true;
279 blend
.Entry
[i
].ColorClampRange
= COLORCLAMP_RTFORMAT
;
281 blend
.Entry
[i
].WriteDisableRed
= params
->color_write_disable
[0];
282 blend
.Entry
[i
].WriteDisableGreen
= params
->color_write_disable
[1];
283 blend
.Entry
[i
].WriteDisableBlue
= params
->color_write_disable
[2];
284 blend
.Entry
[i
].WriteDisableAlpha
= params
->color_write_disable
[3];
288 void *state
= brw_state_batch(brw
, AUB_TRACE_BLEND_STATE
,
289 GENX(BLEND_STATE_length
) * 4, 64, &offset
);
290 GENX(BLEND_STATE_pack
)(NULL
, state
, &blend
);
296 blorp_emit_color_calc_state(struct brw_context
*brw
,
297 const struct brw_blorp_params
*params
)
300 void *state
= brw_state_batch(brw
, AUB_TRACE_CC_STATE
,
301 GENX(COLOR_CALC_STATE_length
) * 4, 64, &offset
);
302 memset(state
, 0, GENX(COLOR_CALC_STATE_length
) * 4);
308 blorp_emit_depth_stencil_state(struct brw_context
*brw
,
309 const struct brw_blorp_params
*params
)
311 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
312 * - 7.5.3.1 Depth Buffer Clear
313 * - 7.5.3.2 Depth Buffer Resolve
314 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
316 struct GENX(DEPTH_STENCIL_STATE
) ds
= {
317 .DepthBufferWriteEnable
= true,
320 if (params
->hiz_op
== GEN6_HIZ_OP_DEPTH_RESOLVE
) {
321 ds
.DepthTestEnable
= true;
322 ds
.DepthTestFunction
= COMPAREFUNCTION_NEVER
;
326 void *state
= brw_state_batch(brw
, AUB_TRACE_DEPTH_STENCIL_STATE
,
327 GENX(DEPTH_STENCIL_STATE_length
) * 4, 64,
329 GENX(DEPTH_STENCIL_STATE_pack
)(NULL
, state
, &ds
);
335 blorp_emit_sampler_state(struct brw_context
*brw
,
336 const struct brw_blorp_params
*params
)
338 struct GENX(SAMPLER_STATE
) sampler
= {
339 .MipModeFilter
= MIPFILTER_NONE
,
340 .MagModeFilter
= MAPFILTER_LINEAR
,
341 .MinModeFilter
= MAPFILTER_LINEAR
,
344 .TCXAddressControlMode
= TCM_CLAMP
,
345 .TCYAddressControlMode
= TCM_CLAMP
,
346 .TCZAddressControlMode
= TCM_CLAMP
,
347 .MaximumAnisotropy
= RATIO21
,
348 .RAddressMinFilterRoundingEnable
= true,
349 .RAddressMagFilterRoundingEnable
= true,
350 .VAddressMinFilterRoundingEnable
= true,
351 .VAddressMagFilterRoundingEnable
= true,
352 .UAddressMinFilterRoundingEnable
= true,
353 .UAddressMagFilterRoundingEnable
= true,
354 .NonnormalizedCoordinateEnable
= true,
358 void *state
= brw_state_batch(brw
, AUB_TRACE_SAMPLER_STATE
,
359 GENX(SAMPLER_STATE_length
) * 4, 32, &offset
);
360 GENX(SAMPLER_STATE_pack
)(NULL
, state
, &sampler
);
362 blorp_emit(brw
, GENX(3DSTATE_SAMPLER_STATE_POINTERS
), ssp
) {
363 ssp
.VSSamplerStateChange
= true;
364 ssp
.GSSamplerStateChange
= true;
365 ssp
.PSSamplerStateChange
= true;
366 ssp
.PointertoPSSamplerState
= offset
;
370 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
372 blorp_emit_viewport_state(struct brw_context
*brw
,
373 const struct brw_blorp_params
*params
)
375 uint32_t cc_vp_offset
;
377 void *state
= brw_state_batch(brw
, AUB_TRACE_CC_VP_STATE
,
378 GENX(CC_VIEWPORT_length
) * 4, 32,
381 GENX(CC_VIEWPORT_pack
)(brw
, state
,
382 &(struct GENX(CC_VIEWPORT
)) {
387 blorp_emit(brw
, GENX(3DSTATE_VIEWPORT_STATE_POINTERS
), vsp
) {
388 vsp
.CCViewportStateChange
= true;
389 vsp
.PointertoCC_VIEWPORT
= cc_vp_offset
;
395 * \brief Execute a blit or render pass operation.
397 * To execute the operation, this function manually constructs and emits a
398 * batch to draw a rectangle primitive. The batchbuffer is flushed before
399 * constructing and after emitting the batch.
401 * This function alters no GL state.
404 genX(blorp_exec
)(struct brw_context
*brw
,
405 const struct brw_blorp_params
*params
)
407 uint32_t blend_state_offset
= 0;
408 uint32_t color_calc_state_offset
= 0;
409 uint32_t depth_stencil_state_offset
;
410 uint32_t wm_bind_bo_offset
= 0;
412 /* Emit workaround flushes when we switch from drawing to blorping. */
413 brw_emit_post_sync_nonzero_flush(brw
);
415 brw_upload_state_base_address(brw
);
417 gen6_blorp_emit_vertices(brw
, params
);
421 * Assign the entire URB to the VS. Even though the VS disabled, URB space
422 * is still needed because the clipper loads the VUE's from the URB. From
423 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
424 * Dword 1.15:0 "VS Number of URB Entries":
425 * This field is always used (even if VS Function Enable is DISABLED).
427 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
428 * safely ignore it because this batch contains only one draw call.
429 * Because of URB corruption caused by allocating a previous GS unit
430 * URB entry to the VS unit, software is required to send a “GS NULL
431 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
432 * plus a dummy DRAW call before any case where VS will be taking over
435 blorp_emit(brw
, GENX(3DSTATE_URB
), urb
) {
436 urb
.VSNumberofURBEntries
= brw
->urb
.max_vs_entries
;
439 if (params
->wm_prog_data
) {
440 blend_state_offset
= blorp_emit_blend_state(brw
, params
);
441 color_calc_state_offset
= blorp_emit_color_calc_state(brw
, params
);
443 depth_stencil_state_offset
= blorp_emit_depth_stencil_state(brw
, params
);
445 /* 3DSTATE_CC_STATE_POINTERS
447 * The pointer offsets are relative to
448 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
450 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
452 blorp_emit(brw
, GENX(3DSTATE_CC_STATE_POINTERS
), cc
) {
453 cc
.BLEND_STATEChange
= true;
454 cc
.COLOR_CALC_STATEChange
= true;
455 cc
.DEPTH_STENCIL_STATEChange
= true;
456 cc
.PointertoBLEND_STATE
= blend_state_offset
;
457 cc
.PointertoCOLOR_CALC_STATE
= color_calc_state_offset
;
458 cc
.PointertoDEPTH_STENCIL_STATE
= depth_stencil_state_offset
;
461 blorp_emit(brw
, GENX(3DSTATE_CONSTANT_VS
), vs
);
462 blorp_emit(brw
, GENX(3DSTATE_CONSTANT_GS
), gs
);
463 blorp_emit(brw
, GENX(3DSTATE_CONSTANT_PS
), ps
);
465 if (params
->wm_prog_data
) {
466 uint32_t wm_surf_offset_renderbuffer
;
467 uint32_t wm_surf_offset_texture
= 0;
469 wm_surf_offset_renderbuffer
=
470 brw_blorp_emit_surface_state(brw
, ¶ms
->dst
,
471 I915_GEM_DOMAIN_RENDER
,
472 I915_GEM_DOMAIN_RENDER
, true);
473 if (params
->src
.bo
) {
474 wm_surf_offset_texture
=
475 brw_blorp_emit_surface_state(brw
, ¶ms
->src
,
476 I915_GEM_DOMAIN_SAMPLER
, 0, false);
479 gen6_blorp_emit_binding_table(brw
,
480 wm_surf_offset_renderbuffer
,
481 wm_surf_offset_texture
);
483 blorp_emit(brw
, GENX(3DSTATE_BINDING_TABLE_POINTERS
), bt
) {
484 bt
.PSBindingTableChange
= true;
485 bt
.PointertoPSBindingTable
= wm_bind_bo_offset
;
490 blorp_emit_sampler_state(brw
, params
);
492 gen6_emit_3dstate_multisample(brw
, params
->dst
.surf
.samples
);
494 blorp_emit(brw
, GENX(3DSTATE_SAMPLE_MASK
), mask
) {
495 mask
.SampleMask
= (1 << params
->dst
.surf
.samples
) - 1;
498 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
499 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
501 * [DevSNB] A pipeline flush must be programmed prior to a
502 * 3DSTATE_VS command that causes the VS Function Enable to
503 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
504 * command with CS stall bit set and a post sync operation.
506 * We've already done one at the start of the BLORP operation.
508 blorp_emit(brw
, GENX(3DSTATE_VS
), vs
);
509 blorp_emit(brw
, GENX(3DSTATE_GS
), gs
);
511 blorp_emit(brw
, GENX(3DSTATE_CLIP
), clip
) {
512 clip
.PerspectiveDivideDisable
= true;
515 blorp_emit_sf_config(brw
, params
);
516 blorp_emit_wm_config(brw
, params
);
518 blorp_emit_viewport_state(brw
, params
);
520 if (params
->depth
.bo
) {
521 blorp_emit_depth_stencil_config(brw
, params
);
523 brw_emit_depth_stall_flushes(brw
);
525 blorp_emit(brw
, GENX(3DSTATE_DEPTH_BUFFER
), db
) {
526 db
.SurfaceType
= SURFTYPE_NULL
;
527 db
.SurfaceFormat
= D32_FLOAT
;
529 blorp_emit(brw
, GENX(3DSTATE_HIER_DEPTH_BUFFER
), hiz
);
530 blorp_emit(brw
, GENX(3DSTATE_STENCIL_BUFFER
), sb
);
533 /* 3DSTATE_CLEAR_PARAMS
535 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
536 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
537 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
539 blorp_emit(brw
, GENX(3DSTATE_CLEAR_PARAMS
), clear
) {
540 clear
.DepthClearValueValid
= true;
541 clear
.DepthClearValue
= params
->depth
.clear_color
.u32
[0];
544 blorp_emit(brw
, GENX(3DSTATE_DRAWING_RECTANGLE
), rect
) {
545 rect
.ClippedDrawingRectangleXMax
= MAX2(params
->x1
, params
->x0
) - 1;
546 rect
.ClippedDrawingRectangleYMax
= MAX2(params
->y1
, params
->y0
) - 1;
549 blorp_emit(brw
, GENX(3DPRIMITIVE
), prim
) {
550 prim
.VertexAccessType
= SEQUENTIAL
;
551 prim
.PrimitiveTopologyType
= _3DPRIM_RECTLIST
;
552 prim
.VertexCountPerInstance
= 3;
553 prim
.InstanceCount
= params
->num_layers
;