2 * Copyright © 2016 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "intel_batchbuffer.h"
27 #include "intel_fbo.h"
28 #include "intel_mipmap_tree.h"
30 #include "brw_context.h"
31 #include "brw_defines.h"
32 #include "brw_state.h"
34 #include "brw_blorp.h"
37 /* SURFACE_STATE for renderbuffer or texture surface (see
38 * brw_update_renderbuffer_surface and brw_update_texture_surface)
41 gen8_blorp_emit_surface_state(struct brw_context
*brw
,
42 const struct brw_blorp_surface_info
*surface
,
43 uint32_t read_domains
, uint32_t write_domain
,
44 bool is_render_target
)
46 uint32_t wm_surf_offset
;
47 const struct intel_mipmap_tree
*mt
= surface
->mt
;
48 const uint32_t mocs_wb
= is_render_target
?
49 (brw
->gen
>= 9 ? SKL_MOCS_PTE
: BDW_MOCS_PTE
) :
50 (brw
->gen
>= 9 ? SKL_MOCS_WB
: BDW_MOCS_WB
);
51 const uint32_t tiling
= surface
->map_stencil_as_y_tiled
52 ? I915_TILING_Y
: mt
->tiling
;
53 uint32_t tile_x
, tile_y
;
55 uint32_t *surf
= gen8_allocate_surface_state(brw
, &wm_surf_offset
, -1);
57 surf
[0] = BRW_SURFACE_2D
<< BRW_SURFACE_TYPE_SHIFT
|
58 surface
->brw_surfaceformat
<< BRW_SURFACE_FORMAT_SHIFT
|
59 gen8_vertical_alignment(brw
, mt
, BRW_SURFACE_2D
) |
60 gen8_horizontal_alignment(brw
, mt
, BRW_SURFACE_2D
) |
61 gen8_surface_tiling_mode(tiling
);
63 surf
[1] = SET_FIELD(mocs_wb
, GEN8_SURFACE_MOCS
) | mt
->qpitch
>> 2;
65 surf
[2] = SET_FIELD(surface
->width
- 1, GEN7_SURFACE_WIDTH
) |
66 SET_FIELD(surface
->height
- 1, GEN7_SURFACE_HEIGHT
);
68 uint32_t pitch_bytes
= mt
->pitch
;
69 if (surface
->map_stencil_as_y_tiled
)
71 surf
[3] = pitch_bytes
- 1;
73 surf
[4] = gen7_surface_msaa_bits(surface
->num_samples
,
74 surface
->msaa_layout
);
76 if (surface
->mt
->mcs_mt
) {
77 surf
[6] = SET_FIELD(surface
->mt
->qpitch
/ 4, GEN8_SURFACE_AUX_QPITCH
) |
78 SET_FIELD((surface
->mt
->mcs_mt
->pitch
/ 128) - 1,
79 GEN8_SURFACE_AUX_PITCH
) |
80 gen8_get_aux_mode(brw
, mt
);
85 gen8_emit_fast_clear_color(brw
, mt
, surf
);
86 surf
[7] |= SET_FIELD(HSW_SCS_RED
, GEN7_SURFACE_SCS_R
) |
87 SET_FIELD(HSW_SCS_GREEN
, GEN7_SURFACE_SCS_G
) |
88 SET_FIELD(HSW_SCS_BLUE
, GEN7_SURFACE_SCS_B
) |
89 SET_FIELD(HSW_SCS_ALPHA
, GEN7_SURFACE_SCS_A
);
92 *((uint64_t *)&surf
[8]) =
93 brw_blorp_compute_tile_offsets(surface
, &tile_x
, &tile_y
) +
96 /* Note that the low bits of these fields are missing, so there's the
97 * possibility of getting in trouble.
99 assert(tile_x
% 4 == 0);
100 assert(tile_y
% 4 == 0);
101 surf
[5] = SET_FIELD(tile_x
/ 4, BRW_SURFACE_X_OFFSET
) |
102 SET_FIELD(tile_y
/ 4, GEN8_SURFACE_Y_OFFSET
);
105 /* Disable Mip Tail by setting a large value. */
106 surf
[5] |= SET_FIELD(15, GEN9_SURFACE_MIP_TAIL_START_LOD
);
109 if (surface
->mt
->mcs_mt
) {
110 *((uint64_t *) &surf
[10]) = surface
->mt
->mcs_mt
->bo
->offset64
;
111 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
112 wm_surf_offset
+ 10 * 4,
113 surface
->mt
->mcs_mt
->bo
, 0,
114 read_domains
, write_domain
);
117 /* Emit relocation to surface contents */
118 drm_intel_bo_emit_reloc(brw
->batch
.bo
,
119 wm_surf_offset
+ 8 * 4,
121 surf
[8] - mt
->bo
->offset64
,
122 read_domains
, write_domain
);
124 return wm_surf_offset
;
128 gen8_blorp_emit_blend_state(struct brw_context
*brw
,
129 const struct brw_blorp_params
*params
)
131 uint32_t blend_state_offset
;
133 assume(params
->num_draw_buffers
);
135 const unsigned size
= 4 + 8 * params
->num_draw_buffers
;
136 uint32_t *blend
= (uint32_t *)brw_state_batch(brw
, AUB_TRACE_BLEND_STATE
,
138 &blend_state_offset
);
139 memset(blend
, 0, size
);
141 for (unsigned i
= 0; i
< params
->num_draw_buffers
; ++i
) {
142 if (params
->color_write_disable
[0])
143 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_RED
;
144 if (params
->color_write_disable
[1])
145 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_GREEN
;
146 if (params
->color_write_disable
[2])
147 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_BLUE
;
148 if (params
->color_write_disable
[3])
149 blend
[1 + 2 * i
] |= GEN8_BLEND_WRITE_DISABLE_ALPHA
;
151 blend
[1 + 2 * i
+ 1] = GEN8_BLEND_PRE_BLEND_COLOR_CLAMP_ENABLE
|
152 GEN8_BLEND_POST_BLEND_COLOR_CLAMP_ENABLE
|
153 GEN8_BLEND_COLOR_CLAMP_RANGE_RTFORMAT
;
156 return blend_state_offset
;
161 * Disable vertex shader.
164 gen8_blorp_emit_vs_disable(struct brw_context
*brw
)
167 OUT_BATCH(_3DSTATE_VS
<< 16 | (9 - 2));
181 * Disable the hull shader.
184 gen8_blorp_emit_hs_disable(struct brw_context
*brw
)
187 OUT_BATCH(_3DSTATE_HS
<< 16 | (9 - 2));
201 * Disable the domain shader.
204 gen8_blorp_emit_ds_disable(struct brw_context
*brw
)
206 const int ds_pkt_len
= brw
->gen
>= 9 ? 11 : 9;
207 BEGIN_BATCH(ds_pkt_len
);
208 OUT_BATCH(_3DSTATE_DS
<< 16 | (ds_pkt_len
- 2));
209 for (int i
= 0; i
< ds_pkt_len
- 1; i
++)
216 * Disable the geometry shader.
219 gen8_blorp_emit_gs_disable(struct brw_context
*brw
)
222 OUT_BATCH(_3DSTATE_GS
<< 16 | (10 - 2));
240 gen8_blorp_emit_streamout_disable(struct brw_context
*brw
)
243 OUT_BATCH(_3DSTATE_STREAMOUT
<< 16 | (5 - 2));
252 gen8_blorp_emit_raster_state(struct brw_context
*brw
)
255 OUT_BATCH(_3DSTATE_RASTER
<< 16 | (5 - 2));
256 OUT_BATCH(GEN8_RASTER_CULL_NONE
);
264 gen8_blorp_emit_sbe_state(struct brw_context
*brw
,
265 const struct brw_blorp_params
*params
)
267 const unsigned num_varyings
= params
->wm_prog_data
->num_varying_inputs
;
268 const unsigned urb_read_length
=
269 brw_blorp_get_urb_length(params
->wm_prog_data
);
273 const unsigned sbe_cmd_length
= brw
->gen
== 8 ? 4 : 6;
274 BEGIN_BATCH(sbe_cmd_length
);
275 OUT_BATCH(_3DSTATE_SBE
<< 16 | (sbe_cmd_length
- 2));
277 /* There is no need for swizzling (GEN7_SBE_SWIZZLE_ENABLE). All the
278 * vertex data coming from vertex fetcher is taken as unmodified
279 * (i.e., passed through). Vertex shader state is disabled and vertex
280 * fetcher builds complete vertex entries including VUE header.
281 * This is for unknown reason really needed to be disabled when more
282 * than one vec4 worth of vertex attributes are needed.
284 OUT_BATCH(num_varyings
<< GEN7_SBE_NUM_OUTPUTS_SHIFT
|
285 urb_read_length
<< GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT
|
286 BRW_SF_URB_ENTRY_READ_OFFSET
<<
287 GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT
|
288 GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH
|
289 GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET
);
291 OUT_BATCH(params
->wm_prog_data
->flat_inputs
);
292 if (sbe_cmd_length
>= 6) {
293 /* Fragment coordinates are always enabled. */
294 uint32_t dw4
= (GEN9_SBE_ACTIVE_COMPONENT_XYZW
<< (0 << 1));
296 for (unsigned i
= 0; i
< num_varyings
; ++i
) {
297 dw4
|= (GEN9_SBE_ACTIVE_COMPONENT_XYZW
<< ((i
+ 1) << 1));
308 OUT_BATCH(_3DSTATE_SBE_SWIZ
<< 16 | (11 - 2));
310 /* Output DWords 1 through 8: */
311 for (int i
= 0; i
< 8; i
++) {
315 OUT_BATCH(0); /* wrapshortest enables 0-7 */
316 OUT_BATCH(0); /* wrapshortest enables 8-15 */
322 gen8_blorp_emit_sf_config(struct brw_context
*brw
)
324 /* See gen6_blorp_emit_sf_config() */
326 OUT_BATCH(_3DSTATE_SF
<< 16 | (4 - 2));
329 OUT_BATCH(GEN6_SF_LINE_AA_MODE_TRUE
);
334 * Disable thread dispatch (dw5.19) and enable the HiZ op.
337 gen8_blorp_emit_wm_state(struct brw_context
*brw
)
340 OUT_BATCH(_3DSTATE_WM
<< 16 | (2 - 2));
341 OUT_BATCH(GEN7_WM_LINE_AA_WIDTH_1_0
|
342 GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5
|
343 GEN7_WM_POINT_RASTRULE_UPPER_RIGHT
);
350 * Pixel shader dispatch is disabled above in 3DSTATE_WM, dw1.29. Despite
351 * that, thread dispatch info must still be specified.
352 * - Maximum Number of Threads (dw4.24:31) must be nonzero, as the
353 * valid range for this field is [0x3, 0x2f].
354 * - A dispatch mode must be given; that is, at least one of the
355 * "N Pixel Dispatch Enable" (N=8,16,32) fields must be set. This was
356 * discovered through simulator error messages.
359 gen8_blorp_emit_ps_config(struct brw_context
*brw
,
360 const struct brw_blorp_params
*params
)
362 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
363 uint32_t dw3
, dw5
, dw6
, dw7
, ksp0
, ksp2
;
365 dw3
= dw5
= dw6
= dw7
= ksp0
= ksp2
= 0;
366 dw3
|= GEN7_PS_VECTOR_MASK_ENABLE
;
368 if (params
->src
.mt
) {
369 dw3
|= 1 << GEN7_PS_SAMPLER_COUNT_SHIFT
; /* Up to 4 samplers */
370 dw3
|= 2 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT
; /* Two surfaces */
372 dw3
|= 1 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT
; /* One surface */
375 dw7
|= prog_data
->first_curbe_grf_0
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_0
;
376 dw7
|= prog_data
->first_curbe_grf_2
<< GEN7_PS_DISPATCH_START_GRF_SHIFT_2
;
378 if (params
->wm_prog_data
->dispatch_8
)
379 dw6
|= GEN7_PS_8_DISPATCH_ENABLE
;
380 if (params
->wm_prog_data
->dispatch_16
)
381 dw6
|= GEN7_PS_16_DISPATCH_ENABLE
;
383 ksp0
= params
->wm_prog_kernel
;
384 ksp2
= params
->wm_prog_kernel
+ params
->wm_prog_data
->ksp_offset_2
;
386 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
387 * it implicitly scales for different GT levels (which have some # of PSDs).
389 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
392 dw6
|= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT
;
394 dw6
|= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT
;
396 dw6
|= GEN7_PS_POSOFFSET_NONE
;
397 dw6
|= params
->fast_clear_op
;
400 OUT_BATCH(_3DSTATE_PS
<< 16 | (12 - 2));
408 OUT_BATCH(0); /* kernel 1 pointer */
416 gen8_blorp_emit_ps_blend(struct brw_context
*brw
)
419 OUT_BATCH(_3DSTATE_PS_BLEND
<< 16 | (2 - 2));
420 OUT_BATCH(GEN8_PS_BLEND_HAS_WRITEABLE_RT
);
425 gen8_blorp_emit_ps_extra(struct brw_context
*brw
,
426 const struct brw_blorp_params
*params
)
428 const struct brw_blorp_prog_data
*prog_data
= params
->wm_prog_data
;
431 dw1
|= GEN8_PSX_PIXEL_SHADER_VALID
;
434 dw1
|= GEN8_PSX_KILL_ENABLE
;
436 if (params
->wm_prog_data
->num_varying_inputs
)
437 dw1
|= GEN8_PSX_ATTRIBUTE_ENABLE
;
439 if (params
->dst
.num_samples
> 1 && prog_data
&&
440 prog_data
->persample_msaa_dispatch
)
441 dw1
|= GEN8_PSX_SHADER_IS_PER_SAMPLE
;
444 OUT_BATCH(_3DSTATE_PS_EXTRA
<< 16 | (2 - 2));
450 gen8_blorp_emit_depth_disable(struct brw_context
*brw
)
452 /* Skip repeated NULL depth/stencil emits (think 2D rendering). */
453 if (brw
->no_depth_or_stencil
)
456 brw_emit_depth_stall_flushes(brw
);
459 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER
<< 16 | (8 - 2));
460 OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT
<< 18) | (BRW_SURFACE_NULL
<< 29));
470 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER
<< 16 | (5 - 2));
478 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER
<< 16 | (5 - 2));
487 gen8_blorp_emit_vf_topology(struct brw_context
*brw
)
490 OUT_BATCH(_3DSTATE_VF_TOPOLOGY
<< 16 | (2 - 2));
491 OUT_BATCH(_3DPRIM_RECTLIST
);
496 gen8_blorp_emit_vf_sys_gen_vals_state(struct brw_context
*brw
)
499 OUT_BATCH(_3DSTATE_VF_SGVS
<< 16 | (2 - 2));
505 gen8_blorp_emit_vf_instancing_state(struct brw_context
*brw
,
506 const struct brw_blorp_params
*params
)
508 const unsigned num_varyings
=
509 params
->wm_prog_data
? params
->wm_prog_data
->num_varying_inputs
: 0;
510 const unsigned num_elems
= 2 + num_varyings
;
512 for (unsigned i
= 0; i
< num_elems
; ++i
) {
514 OUT_BATCH(_3DSTATE_VF_INSTANCING
<< 16 | (3 - 2));
522 gen8_blorp_emit_vf_state(struct brw_context
*brw
)
525 OUT_BATCH(_3DSTATE_VF
<< 16 | (2 - 2));
531 gen8_blorp_emit_depth_stencil_state(struct brw_context
*brw
,
532 const struct brw_blorp_params
*params
)
534 const unsigned pkt_len
= brw
->gen
>= 9 ? 4 : 3;
536 BEGIN_BATCH(pkt_len
);
537 OUT_BATCH(_3DSTATE_WM_DEPTH_STENCIL
<< 16 | (pkt_len
- 2));
547 gen8_blorp_emit_disable_constant_ps(struct brw_context
*brw
)
549 const int dwords
= brw
->gen
>= 8 ? 11 : 7;
551 OUT_BATCH(_3DSTATE_CONSTANT_PS
<< 16 | (dwords
- 2));
585 gen8_blorp_emit_surface_states(struct brw_context
*brw
,
586 const struct brw_blorp_params
*params
)
588 uint32_t wm_surf_offset_renderbuffer
;
589 uint32_t wm_surf_offset_texture
= 0;
591 intel_miptree_used_for_rendering(params
->dst
.mt
);
593 wm_surf_offset_renderbuffer
=
594 gen8_blorp_emit_surface_state(brw
, ¶ms
->dst
,
595 I915_GEM_DOMAIN_RENDER
,
596 I915_GEM_DOMAIN_RENDER
,
597 true /* is_render_target */);
598 if (params
->src
.mt
) {
599 const struct brw_blorp_surface_info
*surface
= ¶ms
->src
;
600 struct intel_mipmap_tree
*mt
= surface
->mt
;
602 /* If src is a 2D multisample array texture on Gen7+ using
603 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src layer is the
604 * physical layer holding sample 0. So, for example, if mt->num_samples
605 * == 4, then logical layer n corresponds to layer == 4*n.
607 * Multisampled depth and stencil surfaces have the samples interleaved
608 * (INTEL_MSAA_LAYOUT_IMS) and therefore the layer doesn't need
611 const unsigned layer_divider
=
612 (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
613 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) ?
614 MAX2(mt
->num_samples
, 1) : 1;
616 /* Cube textures are sampled as 2D array. */
617 const bool is_cube
= mt
->target
== GL_TEXTURE_CUBE_MAP_ARRAY
||
618 mt
->target
== GL_TEXTURE_CUBE_MAP
;
619 const unsigned depth
= (is_cube
? 6 : 1) * mt
->logical_depth0
;
620 const GLenum target
= is_cube
? GL_TEXTURE_2D_ARRAY
: mt
->target
;
621 const unsigned layer
= mt
->target
!= GL_TEXTURE_3D
?
622 surface
->layer
/ layer_divider
: 0;
624 brw
->vtbl
.emit_texture_surface_state(brw
, mt
, target
,
626 surface
->level
, mt
->last_level
+ 1,
627 surface
->brw_surfaceformat
,
629 &wm_surf_offset_texture
,
633 return gen6_blorp_emit_binding_table(brw
,
634 wm_surf_offset_renderbuffer
,
635 wm_surf_offset_texture
);
639 * \copydoc gen6_blorp_exec()
642 gen8_blorp_exec(struct brw_context
*brw
, const struct brw_blorp_params
*params
)
644 uint32_t wm_bind_bo_offset
= 0;
646 brw_upload_state_base_address(brw
);
648 gen7_blorp_emit_cc_viewport(brw
);
649 gen7_l3_state
.emit(brw
);
651 gen7_blorp_emit_urb_config(brw
, params
);
653 const uint32_t cc_blend_state_offset
=
654 gen8_blorp_emit_blend_state(brw
, params
);
655 gen7_blorp_emit_blend_state_pointer(brw
, cc_blend_state_offset
);
657 const uint32_t cc_state_offset
= gen6_blorp_emit_cc_state(brw
);
658 gen7_blorp_emit_cc_state_pointer(brw
, cc_state_offset
);
660 gen8_blorp_emit_disable_constant_ps(brw
);
661 wm_bind_bo_offset
= gen8_blorp_emit_surface_states(brw
, params
);
663 gen7_blorp_emit_binding_table_pointers_ps(brw
, wm_bind_bo_offset
);
665 if (params
->src
.mt
) {
666 const uint32_t sampler_offset
=
667 gen6_blorp_emit_sampler_state(brw
, BRW_MAPFILTER_LINEAR
, 0, true);
668 gen7_blorp_emit_sampler_state_pointers_ps(brw
, sampler_offset
);
671 gen8_emit_3dstate_multisample(brw
, params
->dst
.num_samples
);
672 gen6_emit_3dstate_sample_mask(brw
,
673 params
->dst
.num_samples
> 1 ?
674 (1 << params
->dst
.num_samples
) - 1 : 1);
676 gen8_disable_stages
.emit(brw
);
677 gen8_blorp_emit_vs_disable(brw
);
678 gen8_blorp_emit_hs_disable(brw
);
679 gen7_blorp_emit_te_disable(brw
);
680 gen8_blorp_emit_ds_disable(brw
);
681 gen8_blorp_emit_gs_disable(brw
);
683 gen8_blorp_emit_streamout_disable(brw
);
684 gen6_blorp_emit_clip_disable(brw
);
685 gen8_blorp_emit_raster_state(brw
);
686 gen8_blorp_emit_sbe_state(brw
, params
);
687 gen8_blorp_emit_sf_config(brw
);
689 gen8_blorp_emit_ps_blend(brw
);
690 gen8_blorp_emit_ps_extra(brw
, params
);
692 gen8_blorp_emit_ps_config(brw
, params
);
694 gen8_blorp_emit_depth_stencil_state(brw
, params
);
695 gen8_blorp_emit_wm_state(brw
);
697 gen8_blorp_emit_depth_disable(brw
);
698 gen7_blorp_emit_clear_params(brw
, params
);
699 gen6_blorp_emit_drawing_rectangle(brw
, params
);
700 gen8_blorp_emit_vf_topology(brw
);
701 gen8_blorp_emit_vf_sys_gen_vals_state(brw
);
702 gen6_blorp_emit_vertices(brw
, params
);
703 gen8_blorp_emit_vf_instancing_state(brw
, params
);
704 gen8_blorp_emit_vf_state(brw
);
705 gen7_blorp_emit_primitive(brw
, params
);
708 gen8_write_pma_stall_bits(brw
, 0);