2 * Copyright © 2011 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
29 #include "brw_context.h"
30 #include "brw_state.h"
32 #include "brw_blorp.h"
34 /* Once vertex fetcher has written full VUE entries with complete
35 * header the space requirement is as follows per vertex (in bytes):
37 * Header Position Program constants
38 * +--------+------------+-------------------+
39 * | 16 | 16 | n x 16 |
40 * +--------+------------+-------------------+
42 * where 'n' stands for number of varying inputs expressed as vec4s.
44 * The URB size is in turn expressed in 64 bytes (512 bits).
47 gen7_blorp_get_vs_entry_size(const struct brw_blorp_params
*params
)
49 const unsigned num_varyings
=
50 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
51 const unsigned total_needed
= 16 + 16 + num_varyings
* 16;
53 return DIV_ROUND_UP(total_needed
, 64);
61 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
62 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
64 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
65 * programmed in order for the programming of this state to be
69 gen7_blorp_emit_urb_config(struct brw_context
*brw
,
70 const struct brw_blorp_params
*params
)
72 const unsigned vs_entry_size
= gen7_blorp_get_vs_entry_size(params
);
74 if (!(brw
->ctx
.NewDriverState
& (BRW_NEW_CONTEXT
| BRW_NEW_URB_SIZE
)) &&
75 brw
->urb
.vsize
>= vs_entry_size
)
78 brw
->ctx
.NewDriverState
|= BRW_NEW_URB_SIZE
;
80 gen7_upload_urb(brw
, vs_entry_size
, false, false);
84 /* 3DSTATE_BLEND_STATE_POINTERS */
86 gen7_blorp_emit_blend_state_pointer(struct brw_context
*brw
,
87 uint32_t cc_blend_state_offset
)
90 OUT_BATCH(_3DSTATE_BLEND_STATE_POINTERS
<< 16 | (2 - 2));
91 OUT_BATCH(cc_blend_state_offset
| 1);
96 /* 3DSTATE_CC_STATE_POINTERS */
98 gen7_blorp_emit_cc_state_pointer(struct brw_context
*brw
,
99 uint32_t cc_state_offset
)
102 OUT_BATCH(_3DSTATE_CC_STATE_POINTERS
<< 16 | (2 - 2));
103 OUT_BATCH(cc_state_offset
| 1);
108 gen7_blorp_emit_cc_viewport(struct brw_context
*brw
)
110 struct brw_cc_viewport
*ccv
;
111 uint32_t cc_vp_offset
;
113 ccv
= (struct brw_cc_viewport
*)brw_state_batch(brw
, AUB_TRACE_CC_VP_STATE
,
116 ccv
->min_depth
= 0.0;
117 ccv
->max_depth
= 1.0;
120 OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS_CC
<< 16 | (2 - 2));
121 OUT_BATCH(cc_vp_offset
);
126 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS
128 * The offset is relative to CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
131 gen7_blorp_emit_depth_stencil_state_pointers(struct brw_context
*brw
,
132 uint32_t depthstencil_offset
)
135 OUT_BATCH(_3DSTATE_DEPTH_STENCIL_STATE_POINTERS
<< 16 | (2 - 2));
136 OUT_BATCH(depthstencil_offset
| 1);
141 /* Hardware seems to try to fetch the constants even though the corresponding
142 * stage gets disabled. Therefore make sure the settings for the constant
146 gen7_blorp_disable_constant_state(struct brw_context
*brw
,
150 OUT_BATCH(opcode
<< 16 | (7 - 2));
162 * Disable vertex shader.
165 gen7_blorp_emit_vs_disable(struct brw_context
*brw
)
168 OUT_BATCH(_3DSTATE_VS
<< 16 | (6 - 2));
180 * Disable the hull shader.
183 gen7_blorp_emit_hs_disable(struct brw_context
*brw
)
186 OUT_BATCH(_3DSTATE_HS
<< 16 | (7 - 2));
199 * Disable the tesselation engine.
202 gen7_blorp_emit_te_disable(struct brw_context
*brw
)
205 OUT_BATCH(_3DSTATE_TE
<< 16 | (4 - 2));
215 * Disable the domain shader.
218 gen7_blorp_emit_ds_disable(struct brw_context
*brw
)
221 OUT_BATCH(_3DSTATE_DS
<< 16 | (6 - 2));
232 * Disable the geometry shader.
235 gen7_blorp_emit_gs_disable(struct brw_context
*brw
)
238 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
239 * Geometry > Geometry Shader > State:
241 * "Note: Because of corruption in IVB:GT2, software needs to flush the
242 * whole fixed function pipeline when the GS enable changes value in
245 * The hardware architects have clarified that in this context "flush the
246 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
249 if (brw
->gen
< 8 && !brw
->is_haswell
&& brw
->gt
== 2 && brw
->gs
.enabled
)
250 gen7_emit_cs_stall_flush(brw
);
253 OUT_BATCH(_3DSTATE_GS
<< 16 | (7 - 2));
261 brw
->gs
.enabled
= false;
269 gen7_blorp_emit_streamout_disable(struct brw_context
*brw
)
272 OUT_BATCH(_3DSTATE_STREAMOUT
<< 16 | (3 - 2));
280 gen7_blorp_emit_sf_config(struct brw_context
*brw
,
281 const struct brw_blorp_params
*params
)
285 * Disable ViewportTransformEnable (dw1.1)
287 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
288 * Primitives Overview":
289 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
290 * use of screen- space coordinates).
292 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw1.6:5)
293 * and BackFaceFillMode (dw1.4:3) to SOLID(0).
295 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
296 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
297 * SOLID: Any triangle or rectangle object found to be front-facing
298 * is rendered as a solid object. This setting is required when
299 * (rendering rectangle (RECTLIST) objects.
303 OUT_BATCH(_3DSTATE_SF
<< 16 | (7 - 2));
304 OUT_BATCH(params
->depth_format
<<
305 GEN7_SF_DEPTH_BUFFER_SURFACE_FORMAT_SHIFT
);
306 OUT_BATCH(params
->dst
.surf
.samples
> 1 ? GEN6_SF_MSRAST_ON_PATTERN
: 0);
316 const unsigned num_varyings
=
317 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
318 const unsigned urb_read_length
=
319 brw_blorp_get_urb_length(params
->wm_prog_data
);
322 OUT_BATCH(_3DSTATE_SBE
<< 16 | (14 - 2));
324 /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the
325 * vertex data coming from vertex fetcher is taken as unmodified
326 * (i.e., passed through). Vertex shader state is disabled and vertex
327 * fetcher builds complete vertex entries including VUE header.
328 * This is for unknown reason really needed to be disabled when more
329 * than one vec4 worth of vertex attributes are needed.
331 OUT_BATCH(num_varyings
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
332 urb_read_length
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
333 BRW_SF_URB_ENTRY_READ_OFFSET
<<
334 GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT
);
335 for (int i
= 0; i
< 9; ++i
)
337 OUT_BATCH(params
->wm_prog_data
? params
->wm_prog_data
->flat_inputs
: 0);
346 * Disable thread dispatch (dw5.19) and enable the HiZ op.
349 gen7_blorp_emit_wm_config(struct brw_context
*brw
,
350 const struct brw_blorp_params
*params
)
352 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
353 uint32_t dw1
= 0, dw2
= 0;
355 switch (params
->hiz_op
) {
356 case GEN6_HIZ_OP_DEPTH_CLEAR
:
357 dw1
|= GEN7_WM_DEPTH_CLEAR
;
359 case GEN6_HIZ_OP_DEPTH_RESOLVE
:
360 dw1
|= GEN7_WM_DEPTH_RESOLVE
;
362 case GEN6_HIZ_OP_HIZ_RESOLVE
:
363 dw1
|= GEN7_WM_HIERARCHICAL_DEPTH_RESOLVE
;
365 case GEN6_HIZ_OP_NONE
:
368 unreachable("not reached");
370 dw1
|= GEN7_WM_LINE_AA_WIDTH_1_0
;
371 dw1
|= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5
;
372 dw1
|= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT
; /* No interp */
374 if (params
->wm_prog_data
)
375 dw1
|= GEN7_WM_DISPATCH_ENABLE
; /* We are rendering */
378 dw1
|= GEN7_WM_KILL_ENABLE
; /* TODO: temporarily smash on */
380 if (params
->dst
.surf
.samples
> 1) {
381 dw1
|= GEN7_WM_MSRAST_ON_PATTERN
;
382 if (prog_data
&& prog_data
->persample_msaa_dispatch
)
383 dw2
|= GEN7_WM_MSDISPMODE_PERSAMPLE
;
385 dw2
|= GEN7_WM_MSDISPMODE_PERPIXEL
;
387 dw1
|= GEN7_WM_MSRAST_OFF_PIXEL
;
388 dw2
|= GEN7_WM_MSDISPMODE_PERSAMPLE
;
392 OUT_BATCH(_3DSTATE_WM
<< 16 | (3 - 2));
402 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
403 * that, thread dispatch info must still be specified.
404 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
405 * valid range for this field is [0x3, 0x2f].
406 * - A dispatch mode must be given; that is, at least one of the
407 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
408 * discovered through simulator error messages.
411 gen7_blorp_emit_ps_config(struct brw_context
*brw
,
412 const struct brw_blorp_params
*params
)
414 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
415 uint32_t dw2
, dw4
, dw5
, ksp0
, ksp2
;
416 const int max_threads_shift
= brw
->is_haswell
?
417 HSW_PS_MAX_THREADS_SHIFT
: IVB_PS_MAX_THREADS_SHIFT
;
419 dw2
= dw4
= dw5
= ksp0
= ksp2
= 0;
420 dw4
|= (brw
->max_wm_threads
- 1) << max_threads_shift
;
423 dw4
|= SET_FIELD(1, HSW_PS_SAMPLE_MASK
); /* 1 sample for now */
424 if (params
->wm_prog_data
) {
425 dw5
|= prog_data
->first_curbe_grf_0
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_0
;
426 dw5
|= prog_data
->first_curbe_grf_2
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_2
;
428 ksp0
= params
->wm_prog_kernel
;
429 ksp2
= params
->wm_prog_kernel
+ params
->wm_prog_data
->ksp_offset_2
;
431 if (params
->wm_prog_data
->dispatch_8
)
432 dw4
|= GEN7_PS_8_DISPATCH_ENABLE
;
433 if (params
->wm_prog_data
->dispatch_16
)
434 dw4
|= GEN7_PS_16_DISPATCH_ENABLE
;
435 if (params
->wm_prog_data
->num_varying_inputs
)
436 dw4
|= GEN7_PS_ATTRIBUTE_ENABLE
;
438 /* The hardware gets angry if we don't enable at least one dispatch
439 * mode, so just enable 16-pixel dispatch if we don't have a program.
441 dw4
|= GEN7_PS_16_DISPATCH_ENABLE
;
445 dw2
|= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT
; /* Up to 4 samplers */
447 dw4
|= params
->fast_clear_op
;
450 OUT_BATCH(_3DSTATE_PS
<< 16 | (8 - 2));
456 OUT_BATCH(0); /* kernel 1 pointer */
463 gen7_blorp_emit_binding_table_pointers_ps(struct brw_context
*brw
,
464 uint32_t wm_bind_bo_offset
)
467 OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS
<< 16 | (2 - 2));
468 OUT_BATCH(wm_bind_bo_offset
);
474 gen7_blorp_emit_sampler_state_pointers_ps(struct brw_context
*brw
,
475 uint32_t sampler_offset
)
478 OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS
<< 16 | (2 - 2));
479 OUT_BATCH(sampler_offset
);
484 gen7_blorp_emit_depth_stencil_config(struct brw_context
*brw
,
485 const struct brw_blorp_params
*params
)
487 const uint8_t mocs
= GEN7_MOCS_L3
;
489 GLenum gl_target
= params
->depth
.mt
->target
;
492 case GL_TEXTURE_CUBE_MAP_ARRAY
:
493 case GL_TEXTURE_CUBE_MAP
:
494 /* The PRM claims that we should use BRW_SURFACE_CUBE for this
495 * situation, but experiments show that gl_Layer doesn't work when we do
496 * this. So we use BRW_SURFACE_2D, since for rendering purposes this is
499 surftype
= BRW_SURFACE_2D
;
502 surftype
= translate_tex_target(gl_target
);
506 /* 3DSTATE_DEPTH_BUFFER */
508 brw_emit_depth_stall_flushes(brw
);
510 unsigned depth
= MAX2(params
->depth
.surf
.logical_level0_px
.depth
,
511 params
->depth
.surf
.logical_level0_px
.array_len
);
514 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (7 - 2));
515 OUT_BATCH((params
->depth
.surf
.row_pitch
- 1) |
516 params
->depth_format
<< 18 |
517 1 << 22 | /* hiz enable */
518 1 << 28 | /* depth write */
520 OUT_RELOC(params
->depth
.mt
->bo
,
521 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
523 OUT_BATCH((params
->depth
.surf
.logical_level0_px
.width
- 1) << 4 |
524 (params
->depth
.surf
.logical_level0_px
.height
- 1) << 18 |
525 params
->depth
.view
.base_level
);
526 OUT_BATCH(((depth
- 1) << 21) |
527 (params
->depth
.view
.base_array_layer
<< 10) |
530 OUT_BATCH((depth
- 1) << 21);
534 /* 3DSTATE_HIER_DEPTH_BUFFER */
536 struct intel_miptree_aux_buffer
*hiz_buf
= params
->depth
.mt
->hiz_buf
;
539 OUT_BATCH((GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16) | (3 - 2));
540 OUT_BATCH((mocs
<< 25) |
541 (hiz_buf
->pitch
- 1));
542 OUT_RELOC(hiz_buf
->bo
,
543 I915_GEM_DOMAIN_RENDER
, I915_GEM_DOMAIN_RENDER
,
548 /* 3DSTATE_STENCIL_BUFFER */
551 OUT_BATCH((GEN7_3DSTATE_STENCIL_BUFFER
<< 16) | (3 - 2));
560 gen7_blorp_emit_depth_disable(struct brw_context
*brw
)
562 brw_emit_depth_stall_flushes(brw
);
565 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (7 - 2));
566 OUT_BATCH(BRW_DEPTHFORMAT_D32_FLOAT
<< 18 | (BRW_SURFACE_NULL
<< 29));
575 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16 | (3 - 2));
581 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER
<< 16 | (3 - 2));
588 /* 3DSTATE_CLEAR_PARAMS
590 * From the Ivybridge PRM, Volume 2 Part 1, Section 11.5.5.4
591 * 3DSTATE_CLEAR_PARAMS:
592 * 3DSTATE_CLEAR_PARAMS must always be programmed in the along
593 * with the other Depth/Stencil state commands(i.e. 3DSTATE_DEPTH_BUFFER,
594 * 3DSTATE_STENCIL_BUFFER, or 3DSTATE_HIER_DEPTH_BUFFER).
597 gen7_blorp_emit_clear_params(struct brw_context
*brw
,
598 const struct brw_blorp_params
*params
)
601 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS
<< 16 | (3 - 2));
602 OUT_BATCH(params
->depth
.mt
? params
->depth
.mt
->depth_clear_value
: 0);
603 OUT_BATCH(GEN7_DEPTH_CLEAR_VALID
);
610 gen7_blorp_emit_primitive(struct brw_context
*brw
,
611 const struct brw_blorp_params
*params
)
614 OUT_BATCH(CMD_3D_PRIM
<< 16 | (7 - 2));
615 OUT_BATCH(GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL
|
617 OUT_BATCH(3); /* vertex count per instance */
619 OUT_BATCH(params
->num_layers
); /* instance count */
627 * \copydoc gen6_blorp_exec()
630 gen7_blorp_exec(struct brw_context
*brw
,
631 const struct brw_blorp_params
*params
)
636 uint32_t cc_blend_state_offset
= 0;
637 uint32_t cc_state_offset
= 0;
638 uint32_t depthstencil_offset
;
639 uint32_t wm_bind_bo_offset
= 0;
641 brw_upload_state_base_address(brw
);
643 gen6_emit_3dstate_multisample(brw
, params
->dst
.surf
.samples
);
644 gen6_emit_3dstate_sample_mask(brw
,
645 params
->dst
.surf
.samples
> 1 ?
646 (1 << params
->dst
.surf
.samples
) - 1 : 1);
647 gen6_blorp_emit_vertices(brw
, params
);
648 gen7_blorp_emit_urb_config(brw
, params
);
649 if (params
->wm_prog_data
) {
650 cc_blend_state_offset
= gen6_blorp_emit_blend_state(brw
, params
);
651 cc_state_offset
= gen6_blorp_emit_cc_state(brw
);
652 gen7_blorp_emit_blend_state_pointer(brw
, cc_blend_state_offset
);
653 gen7_blorp_emit_cc_state_pointer(brw
, cc_state_offset
);
656 gen7_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_VS
);
657 gen7_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_HS
);
658 gen7_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_DS
);
659 gen7_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_GS
);
660 gen7_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_PS
);
662 depthstencil_offset
= gen6_blorp_emit_depth_stencil_state(brw
, params
);
663 gen7_blorp_emit_depth_stencil_state_pointers(brw
, depthstencil_offset
);
664 if (brw
->use_resource_streamer
)
665 gen7_disable_hw_binding_tables(brw
);
666 if (params
->wm_prog_data
) {
667 uint32_t wm_surf_offset_renderbuffer
;
668 uint32_t wm_surf_offset_texture
= 0;
670 intel_miptree_used_for_rendering(params
->dst
.mt
);
671 wm_surf_offset_renderbuffer
=
672 brw_blorp_emit_surface_state(brw
, ¶ms
->dst
,
673 I915_GEM_DOMAIN_RENDER
,
674 I915_GEM_DOMAIN_RENDER
,
675 true /* is_render_target */);
676 if (params
->src
.mt
) {
677 wm_surf_offset_texture
=
678 brw_blorp_emit_surface_state(brw
, ¶ms
->src
,
679 I915_GEM_DOMAIN_SAMPLER
, 0,
680 false /* is_render_target */);
683 gen6_blorp_emit_binding_table(brw
,
684 wm_surf_offset_renderbuffer
,
685 wm_surf_offset_texture
);
687 gen7_blorp_emit_vs_disable(brw
);
688 gen7_blorp_emit_hs_disable(brw
);
689 gen7_blorp_emit_te_disable(brw
);
690 gen7_blorp_emit_ds_disable(brw
);
691 gen7_blorp_emit_gs_disable(brw
);
692 gen7_blorp_emit_streamout_disable(brw
);
693 gen6_blorp_emit_clip_disable(brw
);
694 gen7_blorp_emit_sf_config(brw
, params
);
695 gen7_blorp_emit_wm_config(brw
, params
);
696 if (params
->wm_prog_data
)
697 gen7_blorp_emit_binding_table_pointers_ps(brw
, wm_bind_bo_offset
);
699 if (params
->src
.mt
) {
700 const uint32_t sampler_offset
=
701 gen6_blorp_emit_sampler_state(brw
, BRW_MAPFILTER_LINEAR
, 0, true);
702 gen7_blorp_emit_sampler_state_pointers_ps(brw
, sampler_offset
);
705 gen7_blorp_emit_ps_config(brw
, params
);
706 gen7_blorp_emit_cc_viewport(brw
);
708 if (params
->depth
.mt
)
709 gen7_blorp_emit_depth_stencil_config(brw
, params
);
711 gen7_blorp_emit_depth_disable(brw
);
712 gen7_blorp_emit_clear_params(brw
, params
);
713 gen6_blorp_emit_drawing_rectangle(brw
, params
);
714 gen7_blorp_emit_primitive(brw
, params
);