intel/blorp: Rework alloc_binding_table
[mesa.git] / src / intel / blorp / blorp_genX_exec.h
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "blorp_priv.h"
25 #include "common/gen_device_info.h"
26 #include "intel_aub.h"
27
28 /**
29 * This file provides the blorp pipeline setup and execution functionality.
30 * It defines the following function:
31 *
32 * static void
33 * blorp_exec(struct blorp_context *blorp, void *batch_data,
34 * const struct blorp_params *params);
35 *
36 * It is the job of whoever includes this header to wrap this in something
37 * to get an externally visible symbol.
38 *
39 * In order for the blorp_exec function to work, the driver must provide
40 * implementations of the following static helper functions.
41 */
42
43 static void *
44 blorp_emit_dwords(struct blorp_batch *batch, unsigned n);
45
46 static uint64_t
47 blorp_emit_reloc(struct blorp_batch *batch,
48 void *location, struct blorp_address address, uint32_t delta);
49
50 static void *
51 blorp_alloc_dynamic_state(struct blorp_batch *batch,
52 enum aub_state_struct_type type,
53 uint32_t size,
54 uint32_t alignment,
55 uint32_t *offset);
56 static void *
57 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
58 struct blorp_address *addr);
59
60 static void
61 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
62 unsigned state_size, unsigned state_alignment,
63 uint32_t *bt_offset, uint32_t *surface_offsets,
64 void **surface_maps);
65 static void
66 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
67 struct blorp_address address, uint32_t delta);
68
69 static void
70 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size);
71 static void
72 blorp_emit_3dstate_multisample(struct blorp_batch *batch, unsigned samples);
73
74 /***** BEGIN blorp_exec implementation ******/
75
76 #include "genxml/gen_macros.h"
77
78 #define __gen_address_type struct blorp_address
79 #define __gen_user_data struct blorp_batch
80
81 static uint64_t
82 __gen_combine_address(struct blorp_batch *batch, void *location,
83 struct blorp_address address, uint32_t delta)
84 {
85 if (address.buffer == NULL) {
86 return address.offset + delta;
87 } else {
88 return blorp_emit_reloc(batch, location, address, delta);
89 }
90 }
91
92 #include "genxml/genX_pack.h"
93
94 #define _blorp_cmd_length(cmd) cmd ## _length
95 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
96 #define _blorp_cmd_header(cmd) cmd ## _header
97 #define _blorp_cmd_pack(cmd) cmd ## _pack
98
99 #define blorp_emit(batch, cmd, name) \
100 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
101 *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \
102 __builtin_expect(_dst != NULL, 1); \
103 _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \
104 _dst = NULL)
105
106 #define blorp_emitn(batch, cmd, n) ({ \
107 uint32_t *_dw = blorp_emit_dwords(batch, n); \
108 struct cmd template = { \
109 _blorp_cmd_header(cmd), \
110 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
111 }; \
112 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
113 _dw + 1; /* Array starts at dw[1] */ \
114 })
115
116 /* 3DSTATE_URB
117 * 3DSTATE_URB_VS
118 * 3DSTATE_URB_HS
119 * 3DSTATE_URB_DS
120 * 3DSTATE_URB_GS
121 *
122 * Assign the entire URB to the VS. Even though the VS disabled, URB space
123 * is still needed because the clipper loads the VUE's from the URB. From
124 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
125 * Dword 1.15:0 "VS Number of URB Entries":
126 * This field is always used (even if VS Function Enable is DISABLED).
127 *
128 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
129 * safely ignore it because this batch contains only one draw call.
130 * Because of URB corruption caused by allocating a previous GS unit
131 * URB entry to the VS unit, software is required to send a “GS NULL
132 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
133 * plus a dummy DRAW call before any case where VS will be taking over
134 * GS URB space.
135 *
136 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
137 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
138 *
139 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
140 * programmed in order for the programming of this state to be
141 * valid.
142 */
143 static void
144 emit_urb_config(struct blorp_batch *batch,
145 const struct blorp_params *params)
146 {
147 /* Once vertex fetcher has written full VUE entries with complete
148 * header the space requirement is as follows per vertex (in bytes):
149 *
150 * Header Position Program constants
151 * +--------+------------+-------------------+
152 * | 16 | 16 | n x 16 |
153 * +--------+------------+-------------------+
154 *
155 * where 'n' stands for number of varying inputs expressed as vec4s.
156 */
157 const unsigned num_varyings =
158 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
159 const unsigned total_needed = 16 + 16 + num_varyings * 16;
160
161 /* The URB size is expressed in units of 64 bytes (512 bits) */
162 const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);
163
164 blorp_emit_urb_config(batch, vs_entry_size);
165 }
166
167 static void
168 blorp_emit_vertex_data(struct blorp_batch *batch,
169 const struct blorp_params *params,
170 struct blorp_address *addr,
171 uint32_t *size)
172 {
173 const float vertices[] = {
174 /* v0 */ (float)params->x0, (float)params->y1,
175 /* v1 */ (float)params->x1, (float)params->y1,
176 /* v2 */ (float)params->x0, (float)params->y0,
177 };
178
179 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
180 memcpy(data, vertices, sizeof(vertices));
181 *size = sizeof(vertices);
182 }
183
184 static void
185 blorp_emit_input_varying_data(struct blorp_batch *batch,
186 const struct blorp_params *params,
187 struct blorp_address *addr,
188 uint32_t *size)
189 {
190 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
191 const unsigned max_num_varyings =
192 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
193 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
194
195 *size = num_varyings * vec4_size_in_bytes;
196
197 const float *const inputs_src = (const float *)&params->wm_inputs;
198 float *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
199
200 /* Walk over the attribute slots, determine if the attribute is used by
201 * the program and when necessary copy the values from the input storage to
202 * the vertex data buffer.
203 */
204 for (unsigned i = 0; i < max_num_varyings; i++) {
205 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
206
207 if (!(params->wm_prog_data->inputs_read & (1ull << attr)))
208 continue;
209
210 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
211
212 inputs += 4;
213 }
214 }
215
216 static void
217 blorp_emit_vertex_buffers(struct blorp_batch *batch,
218 const struct blorp_params *params)
219 {
220 struct GENX(VERTEX_BUFFER_STATE) vb[2];
221 memset(vb, 0, sizeof(vb));
222
223 unsigned num_buffers = 1;
224
225 uint32_t size;
226 blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
227 vb[0].VertexBufferIndex = 0;
228 vb[0].BufferPitch = 2 * sizeof(float);
229 vb[0].VertexBufferMOCS = batch->blorp->mocs.vb;
230 #if GEN_GEN >= 7
231 vb[0].AddressModifyEnable = true;
232 #endif
233 #if GEN_GEN >= 8
234 vb[0].BufferSize = size;
235 #else
236 vb[0].BufferAccessType = VERTEXDATA;
237 vb[0].EndAddress = vb[0].BufferStartingAddress;
238 vb[0].EndAddress.offset += size - 1;
239 #endif
240
241 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
242 blorp_emit_input_varying_data(batch, params,
243 &vb[1].BufferStartingAddress, &size);
244 vb[1].VertexBufferIndex = 1;
245 vb[1].BufferPitch = 0;
246 vb[1].VertexBufferMOCS = batch->blorp->mocs.vb;
247 #if GEN_GEN >= 7
248 vb[1].AddressModifyEnable = true;
249 #endif
250 #if GEN_GEN >= 8
251 vb[1].BufferSize = size;
252 #else
253 vb[1].BufferAccessType = INSTANCEDATA;
254 vb[1].EndAddress = vb[1].BufferStartingAddress;
255 vb[1].EndAddress.offset += size - 1;
256 #endif
257 num_buffers++;
258 }
259
260 const unsigned num_dwords =
261 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
262 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
263
264 for (unsigned i = 0; i < num_buffers; i++) {
265 GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
266 dw += GENX(VERTEX_BUFFER_STATE_length);
267 }
268 }
269
270 static void
271 blorp_emit_vertex_elements(struct blorp_batch *batch,
272 const struct blorp_params *params)
273 {
274 const unsigned num_varyings =
275 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
276 const unsigned num_elements = 2 + num_varyings;
277
278 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
279 memset(ve, 0, num_elements * sizeof(*ve));
280
281 /* Setup VBO for the rectangle primitive..
282 *
283 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
284 * vertices. The vertices reside in screen space with DirectX
285 * coordinates (that is, (0, 0) is the upper left corner).
286 *
287 * v2 ------ implied
288 * | |
289 * | |
290 * v0 ----- v1
291 *
292 * Since the VS is disabled, the clipper loads each VUE directly from
293 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
294 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
295 * dw0: Reserved, MBZ.
296 * dw1: Render Target Array Index. Below vertex fetcher gets programmed
297 * to assign this with primitive instance identifier which will be
298 * used for layered clears. All other renders have only one instance
299 * and therefore the value will be effectively zero.
300 * dw2: Viewport Index. The HiZ op disables viewport mapping and
301 * scissoring, so set the dword to 0.
302 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
303 * so set the dword to 0.
304 * dw4: Vertex Position X.
305 * dw5: Vertex Position Y.
306 * dw6: Vertex Position Z.
307 * dw7: Vertex Position W.
308 *
309 * dw8: Flat vertex input 0
310 * dw9: Flat vertex input 1
311 * ...
312 * dwn: Flat vertex input n - 8
313 *
314 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
315 * "Vertex URB Entry (VUE) Formats".
316 *
317 * Only vertex position X and Y are going to be variable, Z is fixed to
318 * zero and W to one. Header words dw0,2,3 are zero. There is no need to
319 * include the fixed values in the vertex buffer. Vertex fetcher can be
320 * instructed to fill vertex elements with constant values of one and zero
321 * instead of reading them from the buffer.
322 * Flat inputs are program constants that are not interpolated. Moreover
323 * their values will be the same between vertices.
324 *
325 * See the vertex element setup below.
326 */
327 ve[0].VertexBufferIndex = 0;
328 ve[0].Valid = true;
329 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
330 ve[0].SourceElementOffset = 0;
331 ve[0].Component0Control = VFCOMP_STORE_0;
332
333 /* From Gen8 onwards hardware is no more instructed to overwrite components
334 * using an element specifier. Instead one has separate 3DSTATE_VF_SGVS
335 * (System Generated Value Setup) state packet for it.
336 */
337 #if GEN_GEN >= 8
338 ve[0].Component1Control = VFCOMP_STORE_0;
339 #else
340 ve[0].Component1Control = VFCOMP_STORE_IID;
341 #endif
342 ve[0].Component2Control = VFCOMP_STORE_0;
343 ve[0].Component3Control = VFCOMP_STORE_0;
344
345 ve[1].VertexBufferIndex = 0;
346 ve[1].Valid = true;
347 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
348 ve[1].SourceElementOffset = 0;
349 ve[1].Component0Control = VFCOMP_STORE_SRC;
350 ve[1].Component1Control = VFCOMP_STORE_SRC;
351 ve[1].Component2Control = VFCOMP_STORE_0;
352 ve[1].Component3Control = VFCOMP_STORE_1_FP;
353
354 for (unsigned i = 0; i < num_varyings; ++i) {
355 ve[i + 2].VertexBufferIndex = 1;
356 ve[i + 2].Valid = true;
357 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
358 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
359 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
360 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
361 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
362 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
363 }
364
365 const unsigned num_dwords =
366 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
367 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
368
369 for (unsigned i = 0; i < num_elements; i++) {
370 GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);
371 dw += GENX(VERTEX_ELEMENT_STATE_length);
372 }
373
374 #if GEN_GEN >= 8
375 /* Overwrite Render Target Array Index (2nd dword) in the VUE header with
376 * primitive instance identifier. This is used for layered clears.
377 */
378 blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs) {
379 sgvs.InstanceIDEnable = true;
380 sgvs.InstanceIDComponentNumber = COMP_1;
381 sgvs.InstanceIDElementOffset = 0;
382 }
383
384 for (unsigned i = 0; i < num_elements; i++) {
385 blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
386 vf.VertexElementIndex = i;
387 vf.InstancingEnable = false;
388 }
389 }
390
391 blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
392 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
393 }
394 #endif
395 }
396
397 static void
398 blorp_emit_sf_config(struct blorp_batch *batch,
399 const struct blorp_params *params)
400 {
401 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
402
403 /* 3DSTATE_SF
404 *
405 * Disable ViewportTransformEnable (dw2.1)
406 *
407 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
408 * Primitives Overview":
409 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
410 * use of screen- space coordinates).
411 *
412 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
413 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
414 *
415 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
416 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
417 * SOLID: Any triangle or rectangle object found to be front-facing
418 * is rendered as a solid object. This setting is required when
419 * (rendering rectangle (RECTLIST) objects.
420 */
421
422 #if GEN_GEN >= 8
423
424 blorp_emit(batch, GENX(3DSTATE_SF), sf);
425
426 blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
427 raster.CullMode = CULLMODE_NONE;
428 }
429
430 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
431 sbe.VertexURBEntryReadOffset = 1;
432 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
433 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
434 sbe.ForceVertexURBEntryReadLength = true;
435 sbe.ForceVertexURBEntryReadOffset = true;
436 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
437
438 #if GEN_GEN >= 9
439 for (unsigned i = 0; i < 32; i++)
440 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
441 #endif
442 }
443
444 #elif GEN_GEN >= 7
445
446 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
447 sf.FrontFaceFillMode = FILL_MODE_SOLID;
448 sf.BackFaceFillMode = FILL_MODE_SOLID;
449
450 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
451 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
452
453 #if GEN_GEN == 7
454 sf.DepthBufferSurfaceFormat = params->depth_format;
455 #endif
456 }
457
458 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
459 sbe.VertexURBEntryReadOffset = 1;
460 if (prog_data) {
461 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
462 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
463 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
464 } else {
465 sbe.NumberofSFOutputAttributes = 0;
466 sbe.VertexURBEntryReadLength = 1;
467 }
468 }
469
470 #else /* GEN_GEN <= 6 */
471
472 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
473 sf.FrontFaceFillMode = FILL_MODE_SOLID;
474 sf.BackFaceFillMode = FILL_MODE_SOLID;
475
476 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
477 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
478
479 sf.VertexURBEntryReadOffset = 1;
480 if (prog_data) {
481 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
482 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
483 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
484 } else {
485 sf.NumberofSFOutputAttributes = 0;
486 sf.VertexURBEntryReadLength = 1;
487 }
488 }
489
490 #endif /* GEN_GEN */
491 }
492
493 static void
494 blorp_emit_ps_config(struct blorp_batch *batch,
495 const struct blorp_params *params)
496 {
497 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
498
499 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
500 * nonzero to prevent the GPU from hanging. While the documentation doesn't
501 * mention this explicitly, it notes that the valid range for the field is
502 * [1,39] = [2,40] threads, which excludes zero.
503 *
504 * To be safe (and to minimize extraneous code) we go ahead and fully
505 * configure the WM state whether or not there is a WM program.
506 */
507
508 #if GEN_GEN >= 8
509
510 blorp_emit(batch, GENX(3DSTATE_WM), wm);
511
512 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
513 if (params->src.addr.buffer) {
514 ps.SamplerCount = 1; /* Up to 4 samplers */
515 ps.BindingTableEntryCount = 2;
516 } else {
517 ps.BindingTableEntryCount = 1;
518 }
519
520 ps.DispatchGRFStartRegisterForConstantSetupData0 =
521 prog_data->first_curbe_grf_0;
522 ps.DispatchGRFStartRegisterForConstantSetupData2 =
523 prog_data->first_curbe_grf_2;
524
525 ps._8PixelDispatchEnable = prog_data->dispatch_8;
526 ps._16PixelDispatchEnable = prog_data->dispatch_16;
527
528 ps.KernelStartPointer0 = params->wm_prog_kernel;
529 ps.KernelStartPointer2 =
530 params->wm_prog_kernel + prog_data->ksp_offset_2;
531
532 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
533 * it implicitly scales for different GT levels (which have some # of
534 * PSDs).
535 *
536 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
537 */
538 if (GEN_GEN >= 9)
539 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
540 else
541 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
542
543 switch (params->fast_clear_op) {
544 case BLORP_FAST_CLEAR_OP_NONE:
545 break;
546 #if GEN_GEN >= 9
547 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
548 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
549 break;
550 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
551 ps.RenderTargetResolveType = RESOLVE_FULL;
552 break;
553 #else
554 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
555 ps.RenderTargetResolveEnable = true;
556 break;
557 #endif
558 case BLORP_FAST_CLEAR_OP_CLEAR:
559 ps.RenderTargetFastClearEnable = true;
560 break;
561 default:
562 unreachable("Invalid fast clear op");
563 }
564 }
565
566 blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
567 psx.PixelShaderValid = true;
568
569 if (params->src.addr.buffer)
570 psx.PixelShaderKillsPixel = true;
571
572 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
573
574 if (prog_data && prog_data->persample_msaa_dispatch)
575 psx.PixelShaderIsPerSample = true;
576 }
577
578 #elif GEN_GEN >= 7
579
580 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
581 switch (params->hiz_op) {
582 case BLORP_HIZ_OP_DEPTH_CLEAR:
583 wm.DepthBufferClear = true;
584 break;
585 case BLORP_HIZ_OP_DEPTH_RESOLVE:
586 wm.DepthBufferResolveEnable = true;
587 break;
588 case BLORP_HIZ_OP_HIZ_RESOLVE:
589 wm.HierarchicalDepthBufferResolveEnable = true;
590 break;
591 case BLORP_HIZ_OP_NONE:
592 break;
593 default:
594 unreachable("not reached");
595 }
596
597 if (prog_data)
598 wm.ThreadDispatchEnable = true;
599
600 if (params->src.addr.buffer)
601 wm.PixelShaderKillPixel = true;
602
603 if (params->dst.surf.samples > 1) {
604 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
605 wm.MultisampleDispatchMode =
606 (prog_data && prog_data->persample_msaa_dispatch) ?
607 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
608 } else {
609 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
610 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
611 }
612 }
613
614 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
615 ps.MaximumNumberofThreads =
616 batch->blorp->isl_dev->info->max_wm_threads - 1;
617
618 #if GEN_IS_HASWELL
619 ps.SampleMask = 1;
620 #endif
621
622 if (prog_data) {
623 ps.DispatchGRFStartRegisterforConstantSetupData0 =
624 prog_data->first_curbe_grf_0;
625 ps.DispatchGRFStartRegisterforConstantSetupData2 =
626 prog_data->first_curbe_grf_2;
627
628 ps.KernelStartPointer0 = params->wm_prog_kernel;
629 ps.KernelStartPointer2 =
630 params->wm_prog_kernel + prog_data->ksp_offset_2;
631
632 ps._8PixelDispatchEnable = prog_data->dispatch_8;
633 ps._16PixelDispatchEnable = prog_data->dispatch_16;
634
635 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
636 } else {
637 /* Gen7 hardware gets angry if we don't enable at least one dispatch
638 * mode, so just enable 16-pixel dispatch if we don't have a program.
639 */
640 ps._16PixelDispatchEnable = true;
641 }
642
643 if (params->src.addr.buffer)
644 ps.SamplerCount = 1; /* Up to 4 samplers */
645
646 switch (params->fast_clear_op) {
647 case BLORP_FAST_CLEAR_OP_NONE:
648 break;
649 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
650 ps.RenderTargetResolveEnable = true;
651 break;
652 case BLORP_FAST_CLEAR_OP_CLEAR:
653 ps.RenderTargetFastClearEnable = true;
654 break;
655 default:
656 unreachable("Invalid fast clear op");
657 }
658 }
659
660 #else /* GEN_GEN <= 6 */
661
662 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
663 wm.MaximumNumberofThreads =
664 batch->blorp->isl_dev->info->max_wm_threads - 1;
665
666 switch (params->hiz_op) {
667 case BLORP_HIZ_OP_DEPTH_CLEAR:
668 wm.DepthBufferClear = true;
669 break;
670 case BLORP_HIZ_OP_DEPTH_RESOLVE:
671 wm.DepthBufferResolveEnable = true;
672 break;
673 case BLORP_HIZ_OP_HIZ_RESOLVE:
674 wm.HierarchicalDepthBufferResolveEnable = true;
675 break;
676 case BLORP_HIZ_OP_NONE:
677 break;
678 default:
679 unreachable("not reached");
680 }
681
682 if (prog_data) {
683 wm.ThreadDispatchEnable = true;
684
685 wm.DispatchGRFStartRegisterforConstantSetupData0 =
686 prog_data->first_curbe_grf_0;
687 wm.DispatchGRFStartRegisterforConstantSetupData2 =
688 prog_data->first_curbe_grf_2;
689
690 wm.KernelStartPointer0 = params->wm_prog_kernel;
691 wm.KernelStartPointer2 =
692 params->wm_prog_kernel + prog_data->ksp_offset_2;
693
694 wm._8PixelDispatchEnable = prog_data->dispatch_8;
695 wm._16PixelDispatchEnable = prog_data->dispatch_16;
696
697 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
698 }
699
700 if (params->src.addr.buffer) {
701 wm.SamplerCount = 1; /* Up to 4 samplers */
702 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
703 }
704
705 if (params->dst.surf.samples > 1) {
706 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
707 wm.MultisampleDispatchMode =
708 (prog_data && prog_data->persample_msaa_dispatch) ?
709 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
710 } else {
711 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
712 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
713 }
714 }
715
716 #endif /* GEN_GEN */
717 }
718
719
720 static void
721 blorp_emit_depth_stencil_config(struct blorp_batch *batch,
722 const struct blorp_params *params)
723 {
724 #if GEN_GEN >= 7
725 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
726 #else
727 const uint32_t mocs = 0;
728 #endif
729
730 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
731 switch (params->depth.surf.dim) {
732 case ISL_SURF_DIM_1D:
733 db.SurfaceType = SURFTYPE_1D;
734 break;
735 case ISL_SURF_DIM_2D:
736 db.SurfaceType = SURFTYPE_2D;
737 break;
738 case ISL_SURF_DIM_3D:
739 db.SurfaceType = SURFTYPE_3D;
740 break;
741 }
742
743 db.SurfaceFormat = params->depth_format;
744
745 #if GEN_GEN >= 7
746 db.DepthWriteEnable = true;
747 #endif
748
749 #if GEN_GEN <= 6
750 db.TiledSurface = true;
751 db.TileWalk = TILEWALK_YMAJOR;
752 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
753 db.SeparateStencilBufferEnable = true;
754 #endif
755
756 db.HierarchicalDepthBufferEnable = true;
757
758 db.Width = params->depth.surf.logical_level0_px.width - 1;
759 db.Height = params->depth.surf.logical_level0_px.height - 1;
760 db.RenderTargetViewExtent = db.Depth =
761 MAX2(params->depth.surf.logical_level0_px.depth,
762 params->depth.surf.logical_level0_px.array_len) - 1;
763
764 db.LOD = params->depth.view.base_level;
765 db.MinimumArrayElement = params->depth.view.base_array_layer;
766
767 db.SurfacePitch = params->depth.surf.row_pitch - 1;
768 db.SurfaceBaseAddress = params->depth.addr;
769 db.DepthBufferMOCS = mocs;
770 }
771
772 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
773 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
774 hiz.SurfaceBaseAddress = params->depth.aux_addr;
775 hiz.HierarchicalDepthBufferMOCS = mocs;
776 }
777
778 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
779 }
780
781 static uint32_t
782 blorp_emit_blend_state(struct blorp_batch *batch,
783 const struct blorp_params *params)
784 {
785 struct GENX(BLEND_STATE) blend;
786 memset(&blend, 0, sizeof(blend));
787
788 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
789 blend.Entry[i].PreBlendColorClampEnable = true;
790 blend.Entry[i].PostBlendColorClampEnable = true;
791 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
792
793 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
794 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
795 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
796 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
797 }
798
799 uint32_t offset;
800 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE,
801 GENX(BLEND_STATE_length) * 4,
802 64, &offset);
803 GENX(BLEND_STATE_pack)(NULL, state, &blend);
804
805 #if GEN_GEN >= 7
806 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
807 sp.BlendStatePointer = offset;
808 #if GEN_GEN >= 8
809 sp.BlendStatePointerValid = true;
810 #endif
811 }
812 #endif
813
814 #if GEN_GEN >= 8
815 blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
816 ps_blend.HasWriteableRT = true;
817 }
818 #endif
819
820 return offset;
821 }
822
823 static uint32_t
824 blorp_emit_color_calc_state(struct blorp_batch *batch,
825 const struct blorp_params *params)
826 {
827 uint32_t offset;
828 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE,
829 GENX(COLOR_CALC_STATE_length) * 4,
830 64, &offset);
831 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
832
833 #if GEN_GEN >= 7
834 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
835 sp.ColorCalcStatePointer = offset;
836 #if GEN_GEN >= 8
837 sp.ColorCalcStatePointerValid = true;
838 #endif
839 }
840 #endif
841
842 return offset;
843 }
844
845 static uint32_t
846 blorp_emit_depth_stencil_state(struct blorp_batch *batch,
847 const struct blorp_params *params)
848 {
849 #if GEN_GEN >= 8
850
851 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
852 blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
853 return 0;
854
855 #else /* GEN_GEN <= 7 */
856
857 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
858 * - 7.5.3.1 Depth Buffer Clear
859 * - 7.5.3.2 Depth Buffer Resolve
860 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
861 */
862 struct GENX(DEPTH_STENCIL_STATE) ds = {
863 .DepthBufferWriteEnable = true,
864 };
865
866 if (params->hiz_op == BLORP_HIZ_OP_DEPTH_RESOLVE) {
867 ds.DepthTestEnable = true;
868 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
869 }
870
871 uint32_t offset;
872 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE,
873 GENX(DEPTH_STENCIL_STATE_length) * 4,
874 64, &offset);
875 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
876
877 #if GEN_GEN >= 7
878 blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
879 sp.PointertoDEPTH_STENCIL_STATE = offset;
880 }
881 #endif
882
883 return offset;
884
885 #endif /* GEN_GEN */
886 }
887
888 struct surface_state_info {
889 unsigned num_dwords;
890 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
891 unsigned reloc_dw;
892 unsigned aux_reloc_dw;
893 };
894
895 static const struct surface_state_info surface_state_infos[] = {
896 [6] = {6, 32, 1, 0},
897 [7] = {8, 32, 1, 6},
898 [8] = {13, 64, 8, 10},
899 [9] = {16, 64, 8, 10},
900 };
901
902 static void
903 blorp_emit_surface_state(struct blorp_batch *batch,
904 const struct brw_blorp_surface_info *surface,
905 uint32_t *state, uint32_t state_offset,
906 bool is_render_target)
907 {
908 const struct surface_state_info ss_info = surface_state_infos[GEN_GEN];
909
910 struct isl_surf surf = surface->surf;
911
912 if (surf.dim == ISL_SURF_DIM_1D &&
913 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
914 assert(surf.logical_level0_px.height == 1);
915 surf.dim = ISL_SURF_DIM_2D;
916 }
917
918 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
919 enum isl_aux_usage aux_usage = surface->aux_usage;
920 if (aux_usage == ISL_AUX_USAGE_HIZ)
921 aux_usage = ISL_AUX_USAGE_NONE;
922
923 const uint32_t mocs =
924 is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
925
926 isl_surf_fill_state(batch->blorp->isl_dev, state,
927 .surf = &surf, .view = &surface->view,
928 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
929 .mocs = mocs, .clear_color = surface->clear_color);
930
931 blorp_surface_reloc(batch, state_offset + ss_info.reloc_dw * 4,
932 surface->addr, 0);
933
934 if (aux_usage != ISL_AUX_USAGE_NONE) {
935 /* On gen7 and prior, the bottom 12 bits of the MCS base address are
936 * used to store other information. This should be ok, however, because
937 * surface buffer addresses are always 4K page alinged.
938 */
939 assert((surface->aux_addr.offset & 0xfff) == 0);
940 blorp_surface_reloc(batch, state_offset + ss_info.aux_reloc_dw * 4,
941 surface->aux_addr, state[ss_info.aux_reloc_dw]);
942 }
943 }
944
945 static void
946 blorp_emit_surface_states(struct blorp_batch *batch,
947 const struct blorp_params *params)
948 {
949 uint32_t bind_offset, surface_offsets[2];
950 void *surface_maps[2];
951
952 const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
953 const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32;
954
955 unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
956 blorp_alloc_binding_table(batch, num_surfaces, ss_size, ss_align,
957 &bind_offset, surface_offsets, surface_maps);
958
959 blorp_emit_surface_state(batch, &params->dst,
960 surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
961 surface_offsets[BLORP_RENDERBUFFER_BT_INDEX], true);
962 if (params->src.addr.buffer) {
963 blorp_emit_surface_state(batch, &params->src,
964 surface_maps[BLORP_TEXTURE_BT_INDEX],
965 surface_offsets[BLORP_TEXTURE_BT_INDEX], false);
966 }
967
968 #if GEN_GEN >= 7
969 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
970 bt.PointertoPSBindingTable = bind_offset;
971 }
972 #else
973 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
974 bt.PSBindingTableChange = true;
975 bt.PointertoPSBindingTable = bind_offset;
976 }
977 #endif
978 }
979
980 static void
981 blorp_emit_sampler_state(struct blorp_batch *batch,
982 const struct blorp_params *params)
983 {
984 struct GENX(SAMPLER_STATE) sampler = {
985 .MipModeFilter = MIPFILTER_NONE,
986 .MagModeFilter = MAPFILTER_LINEAR,
987 .MinModeFilter = MAPFILTER_LINEAR,
988 .MinLOD = 0,
989 .MaxLOD = 0,
990 .TCXAddressControlMode = TCM_CLAMP,
991 .TCYAddressControlMode = TCM_CLAMP,
992 .TCZAddressControlMode = TCM_CLAMP,
993 .MaximumAnisotropy = RATIO21,
994 .RAddressMinFilterRoundingEnable = true,
995 .RAddressMagFilterRoundingEnable = true,
996 .VAddressMinFilterRoundingEnable = true,
997 .VAddressMagFilterRoundingEnable = true,
998 .UAddressMinFilterRoundingEnable = true,
999 .UAddressMagFilterRoundingEnable = true,
1000 .NonnormalizedCoordinateEnable = true,
1001 };
1002
1003 uint32_t offset;
1004 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE,
1005 GENX(SAMPLER_STATE_length) * 4,
1006 32, &offset);
1007 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
1008
1009 #if GEN_GEN >= 7
1010 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1011 ssp.PointertoPSSamplerState = offset;
1012 }
1013 #else
1014 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1015 ssp.VSSamplerStateChange = true;
1016 ssp.GSSamplerStateChange = true;
1017 ssp.PSSamplerStateChange = true;
1018 ssp.PointertoPSSamplerState = offset;
1019 }
1020 #endif
1021 }
1022
1023 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1024 static void
1025 blorp_emit_viewport_state(struct blorp_batch *batch,
1026 const struct blorp_params *params)
1027 {
1028 uint32_t cc_vp_offset;
1029
1030 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE,
1031 GENX(CC_VIEWPORT_length) * 4, 32,
1032 &cc_vp_offset);
1033
1034 GENX(CC_VIEWPORT_pack)(batch, state,
1035 &(struct GENX(CC_VIEWPORT)) {
1036 .MinimumDepth = 0.0,
1037 .MaximumDepth = 1.0,
1038 });
1039
1040 #if GEN_GEN >= 7
1041 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1042 vsp.CCViewportPointer = cc_vp_offset;
1043 }
1044 #else
1045 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1046 vsp.CCViewportStateChange = true;
1047 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1048 }
1049 #endif
1050 }
1051
1052
1053 /**
1054 * \brief Execute a blit or render pass operation.
1055 *
1056 * To execute the operation, this function manually constructs and emits a
1057 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1058 * constructing and after emitting the batch.
1059 *
1060 * This function alters no GL state.
1061 */
1062 static void
1063 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
1064 {
1065 uint32_t blend_state_offset = 0;
1066 uint32_t color_calc_state_offset = 0;
1067 uint32_t depth_stencil_state_offset;
1068
1069 blorp_emit_vertex_buffers(batch, params);
1070 blorp_emit_vertex_elements(batch, params);
1071
1072 emit_urb_config(batch, params);
1073
1074 if (params->wm_prog_data) {
1075 blend_state_offset = blorp_emit_blend_state(batch, params);
1076 color_calc_state_offset = blorp_emit_color_calc_state(batch, params);
1077 }
1078 depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);
1079
1080 #if GEN_GEN <= 6
1081 /* 3DSTATE_CC_STATE_POINTERS
1082 *
1083 * The pointer offsets are relative to
1084 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1085 *
1086 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1087 *
1088 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1089 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1090 * one CC_STATE_POINTERS packet so we have to emit that here.
1091 */
1092 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1093 cc.BLEND_STATEChange = true;
1094 cc.COLOR_CALC_STATEChange = true;
1095 cc.DEPTH_STENCIL_STATEChange = true;
1096 cc.PointertoBLEND_STATE = blend_state_offset;
1097 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1098 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1099 }
1100 #else
1101 (void)blend_state_offset;
1102 (void)color_calc_state_offset;
1103 (void)depth_stencil_state_offset;
1104 #endif
1105
1106 blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
1107 #if GEN_GEN >= 7
1108 blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
1109 blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
1110 #endif
1111 blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
1112 blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
1113
1114 if (params->wm_prog_data)
1115 blorp_emit_surface_states(batch, params);
1116
1117 if (params->src.addr.buffer)
1118 blorp_emit_sampler_state(batch, params);
1119
1120 blorp_emit_3dstate_multisample(batch, params->dst.surf.samples);
1121
1122 blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
1123 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1124 }
1125
1126 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1127 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1128 *
1129 * [DevSNB] A pipeline flush must be programmed prior to a
1130 * 3DSTATE_VS command that causes the VS Function Enable to
1131 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1132 * command with CS stall bit set and a post sync operation.
1133 *
1134 * We've already done one at the start of the BLORP operation.
1135 */
1136 blorp_emit(batch, GENX(3DSTATE_VS), vs);
1137 #if GEN_GEN >= 7
1138 blorp_emit(batch, GENX(3DSTATE_HS), hs);
1139 blorp_emit(batch, GENX(3DSTATE_TE), te);
1140 blorp_emit(batch, GENX(3DSTATE_DS), DS);
1141 blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
1142 #endif
1143 blorp_emit(batch, GENX(3DSTATE_GS), gs);
1144
1145 blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
1146 clip.PerspectiveDivideDisable = true;
1147 }
1148
1149 blorp_emit_sf_config(batch, params);
1150 blorp_emit_ps_config(batch, params);
1151
1152 blorp_emit_viewport_state(batch, params);
1153
1154 if (params->depth.addr.buffer) {
1155 blorp_emit_depth_stencil_config(batch, params);
1156 } else {
1157 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
1158 db.SurfaceType = SURFTYPE_NULL;
1159 db.SurfaceFormat = D32_FLOAT;
1160 }
1161 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1162 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
1163 }
1164
1165 /* 3DSTATE_CLEAR_PARAMS
1166 *
1167 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1168 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1169 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1170 */
1171 blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1172 clear.DepthClearValueValid = true;
1173 clear.DepthClearValue = params->depth.clear_color.u32[0];
1174 }
1175
1176 blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1177 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1178 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1179 }
1180
1181 blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
1182 prim.VertexAccessType = SEQUENTIAL;
1183 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1184 prim.VertexCountPerInstance = 3;
1185 prim.InstanceCount = params->num_layers;
1186 }
1187 }