2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "intel_batchbuffer.h"
27 #include "intel_fbo.h"
28 #include "intel_mipmap_tree.h"
30 #include "brw_context.h"
31 #include "brw_defines.h"
32 #include "brw_state.h"
34 #include "brw_blorp.h"
37 gen8_blorp_emit_blend_state(struct brw_context
*brw
,
38 const struct brw_blorp_params
*params
)
40 uint32_t blend_state_offset
;
42 assume(params
->num_draw_buffers
);
44 const unsigned size
= 4 + 8 * params
->num_draw_buffers
;
45 uint32_t *blend
= (uint32_t *)brw_state_batch(brw
, AUB_TRACE_BLEND_STATE
,
48 memset(blend
, 0, size
);
50 for (unsigned i
= 0; i
< params
->num_draw_buffers
; ++i
) {
51 if (params
->color_write_disable
[0])
52 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_RED
;
53 if (params
->color_write_disable
[1])
54 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_GREEN
;
55 if (params
->color_write_disable
[2])
56 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_BLUE
;
57 if (params
->color_write_disable
[3])
58 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_ALPHA
;
60 blend
[1 + 2 * i
+ 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE
|
61 GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE
|
62 GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT
;
65 return blend_state_offset
;
68 /* Hardware seems to try to fetch the constants even though the corresponding
69 * stage gets disabled. Therefore make sure the settings for the constant
73 gen8_blorp_disable_constant_state(struct brw_context
*brw
,
77 OUT_BATCH(opcode
<< 16 | (11 - 2));
93 * Disable vertex shader.
96 gen8_blorp_emit_vs_disable(struct brw_context
*brw
)
99 OUT_BATCH(_3DSTATE_VS
<< 16 | (9 - 2));
113 * Disable the hull shader.
116 gen8_blorp_emit_hs_disable(struct brw_context
*brw
)
119 OUT_BATCH(_3DSTATE_HS
<< 16 | (9 - 2));
133 * Disable the domain shader.
136 gen8_blorp_emit_ds_disable(struct brw_context
*brw
)
138 const int ds_pkt_len
= brw
->gen
>= 9 ? 11 : 9;
139 BEGIN_BATCH(ds_pkt_len
);
140 OUT_BATCH(_3DSTATE_DS
<< 16 | (ds_pkt_len
- 2));
141 for (int i
= 0; i
< ds_pkt_len
- 1; i
++)
148 * Disable the geometry shader.
151 gen8_blorp_emit_gs_disable(struct brw_context
*brw
)
154 OUT_BATCH(_3DSTATE_GS
<< 16 | (10 - 2));
172 gen8_blorp_emit_streamout_disable(struct brw_context
*brw
)
175 OUT_BATCH(_3DSTATE_STREAMOUT
<< 16 | (5 - 2));
184 gen8_blorp_emit_raster_state(struct brw_context
*brw
)
187 OUT_BATCH(_3DSTATE_RASTER
<< 16 | (5 - 2));
188 OUT_BATCH(GEN8_RASTER_CULL_NONE
);
196 gen8_blorp_emit_sbe_state(struct brw_context
*brw
,
197 const struct brw_blorp_params
*params
)
199 const unsigned num_varyings
= params
->wm_prog_data
->num_varying_inputs
;
200 const unsigned urb_read_length
=
201 brw_blorp_get_urb_length(params
->wm_prog_data
);
205 const unsigned sbe_cmd_length
= brw
->gen
== 8 ? 4 : 6;
206 BEGIN_BATCH(sbe_cmd_length
);
207 OUT_BATCH(_3DSTATE_SBE
<< 16 | (sbe_cmd_length
- 2));
209 /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the
210 * vertex data coming from vertex fetcher is taken as unmodified
211 * (i.e., passed through). Vertex shader state is disabled and vertex
212 * fetcher builds complete vertex entries including VUE header.
213 * This is for unknown reason really needed to be disabled when more
214 * than one vec4 worth of vertex attributes are needed.
216 OUT_BATCH(num_varyings
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
217 urb_read_length
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
218 BRW_SF_URB_ENTRY_READ_OFFSET
<<
219 GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT
|
220 GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH
|
221 GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET
);
223 OUT_BATCH(params
->wm_prog_data
->flat_inputs
);
224 if (sbe_cmd_length
>= 6) {
225 /* Fragment coordinates are always enabled. */
226 uint32_t dw4
= (GEN9_SBE_ACTIVE_COMPONENT_XYZW
<< (0 << 1));
228 for (unsigned i
= 0; i
< num_varyings
; ++i
) {
229 dw4
|= (GEN9_SBE_ACTIVE_COMPONENT_XYZW
<< ((i
+ 1) << 1));
240 OUT_BATCH(_3DSTATE_SBE_SWIZ
<< 16 | (11 - 2));
242 /* Output DWords 1 through 8: */
243 for (int i
= 0; i
< 8; i
++) {
247 OUT_BATCH(0); /* wrapshortest enables 0-7 */
248 OUT_BATCH(0); /* wrapshortest enables 8-15 */
254 gen8_blorp_emit_sf_config(struct brw_context
*brw
)
256 /* See gen6_blorp_emit_sf_config() */
258 OUT_BATCH(_3DSTATE_SF
<< 16 | (4 - 2));
261 OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE
);
266 * Disable thread dispatch (dw5.19) and enable the HiZ op.
269 gen8_blorp_emit_wm_state(struct brw_context
*brw
)
272 OUT_BATCH(_3DSTATE_WM
<< 16 | (2 - 2));
273 OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0
|
274 GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5
|
275 GEN7_WM_POINT_RASTRULE_UPPER_RIGHT
);
282 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
283 * that, thread dispatch info must still be specified.
284 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
285 * valid range for this field is [0x3, 0x2f].
286 * - A dispatch mode must be given; that is, at least one of the
287 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
288 * discovered through simulator error messages.
291 gen8_blorp_emit_ps_config(struct brw_context
*brw
,
292 const struct brw_blorp_params
*params
)
294 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
295 uint32_t dw3
, dw5
, dw6
, dw7
, ksp0
, ksp2
;
297 dw3
= dw5
= dw6
= dw7
= ksp0
= ksp2
= 0;
298 dw3
|= GEN7_PS_VECTOR_MASK_ENABLE
;
300 if (params
->src
.mt
) {
301 dw3
|= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT
; /* Up to 4 samplers */
302 dw3
|= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT
; /* Two surfaces */
304 dw3
|= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT
; /* One surface */
307 dw7
|= prog_data
->first_curbe_grf_0
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_0
;
308 dw7
|= prog_data
->first_curbe_grf_2
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_2
;
310 if (params
->wm_prog_data
->dispatch_8
)
311 dw6
|= GEN7_PS_8_DISPATCH_ENABLE
;
312 if (params
->wm_prog_data
->dispatch_16
)
313 dw6
|= GEN7_PS_16_DISPATCH_ENABLE
;
315 ksp0
= params
->wm_prog_kernel
;
316 ksp2
= params
->wm_prog_kernel
+ params
->wm_prog_data
->ksp_offset_2
;
318 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
319 * it implicitly scales for different GT levels (which have some # of PSDs).
321 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
324 dw6
|= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT
;
326 dw6
|= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT
;
328 dw6
|= GEN7_PS_POSOFFSET_NONE
;
329 dw6
|= params
->fast_clear_op
;
332 OUT_BATCH(_3DSTATE_PS
<< 16 | (12 - 2));
340 OUT_BATCH(0); /* kernel 1 pointer */
348 gen8_blorp_emit_ps_blend(struct brw_context
*brw
)
351 OUT_BATCH(_3DSTATE_PS_BLEND
<< 16 | (2 - 2));
352 OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT
);
357 gen8_blorp_emit_ps_extra(struct brw_context
*brw
,
358 const struct brw_blorp_params
*params
)
360 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
363 dw1
|= GEN8_PSX_PIXEL_SHADER_VALID
;
366 dw1
|= GEN8_PSX_KILL_ENABLE
;
368 if (params
->wm_prog_data
->num_varying_inputs
)
369 dw1
|= GEN8_PSX_ATTRIBUTE_ENABLE
;
371 if (params
->dst
.num_samples
> 1 && prog_data
&&
372 prog_data
->persample_msaa_dispatch
)
373 dw1
|= GEN8_PSX_SHADER_IS_PER_SAMPLE
;
376 OUT_BATCH(_3DSTATE_PS_EXTRA
<< 16 | (2 - 2));
382 gen8_blorp_emit_depth_disable(struct brw_context
*brw
)
384 /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
385 if (brw
->no_depth_or_stencil
)
388 brw_emit_depth_stall_flushes(brw
);
391 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (8 - 2));
392 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT
<< 18) | (BRW_SURFACE_NULL
<< 29));
402 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16 | (5 - 2));
410 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER
<< 16 | (5 - 2));
419 gen8_blorp_emit_vf_topology(struct brw_context
*brw
)
422 OUT_BATCH(_3DSTATE_VF_TOPOLOGY
<< 16 | (2 - 2));
423 OUT_BATCH(_3DPRIM_RECTLIST
);
428 gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context
*brw
)
431 OUT_BATCH(_3DSTATE_VF_SGVS
<< 16 | (2 - 2));
437 gen8_blorp_emit_vf_instancing_state(struct brw_context
*brw
,
438 const struct brw_blorp_params
*params
)
440 const unsigned num_varyings
=
441 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
442 const unsigned num_elems
= 2 + num_varyings
;
444 for (unsigned i
= 0; i
< num_elems
; ++i
) {
446 OUT_BATCH(_3DSTATE_VF_INSTANCING
<< 16 | (3 - 2));
454 gen8_blorp_emit_vf_state(struct brw_context
*brw
)
457 OUT_BATCH(_3DSTATE_VF
<< 16 | (2 - 2));
463 gen8_blorp_emit_depth_stencil_state(struct brw_context
*brw
,
464 const struct brw_blorp_params
*params
)
466 const unsigned pkt_len
= brw
->gen
>= 9 ? 4 : 3;
468 BEGIN_BATCH(pkt_len
);
469 OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL
<< 16 | (pkt_len
- 2));
479 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
480 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
482 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
485 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
487 * which is simply adding 4 then modding by 8 (or anding with 7).
489 * We then may need to apply workarounds for textureGather hardware bugs.
492 swizzle_to_scs(GLenum swizzle
)
494 return (swizzle
+ 4) & 7;
498 gen8_blorp_emit_surface_states(struct brw_context
*brw
,
499 const struct brw_blorp_params
*params
)
501 uint32_t wm_surf_offset_renderbuffer
;
502 uint32_t wm_surf_offset_texture
= 0;
504 intel_miptree_used_for_rendering(params
->dst
.mt
);
506 wm_surf_offset_renderbuffer
=
507 brw_blorp_emit_surface_state(brw
, ¶ms
->dst
,
508 I915_GEM_DOMAIN_RENDER
,
509 I915_GEM_DOMAIN_RENDER
,
510 true /* is_render_target */);
511 if (params
->src
.mt
) {
512 const struct brw_blorp_surface_info
*surface
= ¶ms
->src
;
513 struct intel_mipmap_tree
*mt
= surface
->mt
;
515 /* If src is a 2D multisample array texture on Gen7+ using
516 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src layer is the
517 * physical layer holding sample 0. So, for example, if mt->num_samples
518 * == 4, then logical layer n corresponds to layer == 4*n.
520 * Multisampled depth and stencil surfaces have the samples interleaved
521 * (INTEL_MSAA_LAYOUT_IMS) and therefore the layer doesn't need
524 const unsigned layer_divider
=
525 (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
526 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
527 MAX2(mt
->num_samples
, 1) : 1;
529 const bool is_cube
= mt
->target
== GL_TEXTURE_CUBE_MAP_ARRAY
||
530 mt
->target
== GL_TEXTURE_CUBE_MAP
;
531 const unsigned depth
= (is_cube
? 6 : 1) * mt
->logical_depth0
;
532 const unsigned layer
= mt
->target
!= GL_TEXTURE_3D
?
533 surface
->layer
/ layer_divider
: 0;
535 struct isl_view view
= {
536 .format
= surface
->brw_surfaceformat
,
537 .base_level
= surface
->level
,
538 .levels
= mt
->last_level
- surface
->level
+ 1,
539 .base_array_layer
= layer
,
540 .array_len
= depth
- layer
,
542 swizzle_to_scs(GET_SWZ(surface
->swizzle
, 0)),
543 swizzle_to_scs(GET_SWZ(surface
->swizzle
, 1)),
544 swizzle_to_scs(GET_SWZ(surface
->swizzle
, 2)),
545 swizzle_to_scs(GET_SWZ(surface
->swizzle
, 3)),
547 .usage
= ISL_SURF_USAGE_TEXTURE_BIT
,
550 brw_emit_surface_state(brw
, mt
, &view
,
551 brw
->gen
>= 9 ? SKL_MOCS_WB
: BDW_MOCS_WB
,
552 false, &wm_surf_offset_texture
, -1,
553 I915_GEM_DOMAIN_SAMPLER
, 0);
556 return gen6_blorp_emit_binding_table(brw
,
557 wm_surf_offset_renderbuffer
,
558 wm_surf_offset_texture
);
562 * \copydoc gen6_blorp_exec()
565 gen8_blorp_exec(struct brw_context
*brw
, const struct brw_blorp_params
*params
)
567 uint32_t wm_bind_bo_offset
= 0;
569 brw_upload_state_base_address(brw
);
571 gen7_blorp_emit_cc_viewport(brw
);
572 gen7_l3_state
.emit(brw
);
574 gen7_blorp_emit_urb_config(brw
, params
);
576 const uint32_t cc_blend_state_offset
=
577 gen8_blorp_emit_blend_state(brw
, params
);
578 gen7_blorp_emit_blend_state_pointer(brw
, cc_blend_state_offset
);
580 const uint32_t cc_state_offset
= gen6_blorp_emit_cc_state(brw
);
581 gen7_blorp_emit_cc_state_pointer(brw
, cc_state_offset
);
583 gen8_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_VS
);
584 gen8_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_HS
);
585 gen8_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_DS
);
586 gen8_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_GS
);
587 gen8_blorp_disable_constant_state(brw
, _3DSTATE_CONSTANT_PS
);
589 wm_bind_bo_offset
= gen8_blorp_emit_surface_states(brw
, params
);
591 gen7_blorp_emit_binding_table_pointers_ps(brw
, wm_bind_bo_offset
);
593 if (params
->src
.mt
) {
594 const uint32_t sampler_offset
=
595 gen6_blorp_emit_sampler_state(brw
, BRW_MAPFILTER_LINEAR
, 0, true);
596 gen7_blorp_emit_sampler_state_pointers_ps(brw
, sampler_offset
);
599 gen8_emit_3dstate_multisample(brw
, params
->dst
.num_samples
);
600 gen6_emit_3dstate_sample_mask(brw
,
601 params
->dst
.num_samples
> 1 ?
602 (1 << params
->dst
.num_samples
) - 1 : 1);
604 gen8_disable_stages
.emit(brw
);
605 gen8_blorp_emit_vs_disable(brw
);
606 gen8_blorp_emit_hs_disable(brw
);
607 gen7_blorp_emit_te_disable(brw
);
608 gen8_blorp_emit_ds_disable(brw
);
609 gen8_blorp_emit_gs_disable(brw
);
611 gen8_blorp_emit_streamout_disable(brw
);
612 gen6_blorp_emit_clip_disable(brw
);
613 gen8_blorp_emit_raster_state(brw
);
614 gen8_blorp_emit_sbe_state(brw
, params
);
615 gen8_blorp_emit_sf_config(brw
);
617 gen8_blorp_emit_ps_blend(brw
);
618 gen8_blorp_emit_ps_extra(brw
, params
);
620 gen8_blorp_emit_ps_config(brw
, params
);
622 gen8_blorp_emit_depth_stencil_state(brw
, params
);
623 gen8_blorp_emit_wm_state(brw
);
625 gen8_blorp_emit_depth_disable(brw
);
626 gen7_blorp_emit_clear_params(brw
, params
);
627 gen6_blorp_emit_drawing_rectangle(brw
, params
);
628 gen8_blorp_emit_vf_topology(brw
);
629 gen8_blorp_emit_vf_sys_gen_vals_state(brw
);
630 gen6_blorp_emit_vertices(brw
, params
);
631 gen8_blorp_emit_vf_instancing_state(brw
, params
);
632 gen8_blorp_emit_vf_state(brw
);
633 gen7_blorp_emit_primitive(brw
, params
);
636 gen8_write_pma_stall_bits(brw
, 0);