intel: Add a new "common" library for more code sharing
[mesa.git] / src / intel / blorp / blorp_genX_exec.h
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "blorp_priv.h"
25 #include "common/brw_device_info.h"
26 #include "intel_aub.h"
27
28 /**
29 * This file provides the blorp pipeline setup and execution functionality.
30 * It defines the following function:
31 *
32 * static void
33 * blorp_exec(struct blorp_context *blorp, void *batch_data,
34 * const struct blorp_params *params);
35 *
36 * It is the job of whoever includes this header to wrap this in something
37 * to get an externally visible symbol.
38 *
39 * In order for the blorp_exec function to work, the driver must provide
40 * implementations of the following static helper functions.
41 */
42
43 static void *
44 blorp_emit_dwords(struct blorp_batch *batch, unsigned n);
45
46 static uint64_t
47 blorp_emit_reloc(struct blorp_batch *batch,
48 void *location, struct blorp_address address, uint32_t delta);
49
50 static void *
51 blorp_alloc_dynamic_state(struct blorp_batch *batch,
52 enum aub_state_struct_type type,
53 uint32_t size,
54 uint32_t alignment,
55 uint32_t *offset);
56 static void *
57 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
58 struct blorp_address *addr);
59
60 static void
61 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
62 unsigned state_size, unsigned state_alignment,
63 uint32_t *bt_offset, uint32_t **bt_map,
64 void **surface_maps);
65 static void
66 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
67 struct blorp_address address, uint32_t delta);
68
69 static void
70 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size);
71 static void
72 blorp_emit_3dstate_multisample(struct blorp_batch *batch, unsigned samples);
73
74 /***** BEGIN blorp_exec implementation ******/
75
76 #include "genxml/gen_macros.h"
77
78 #define __gen_address_type struct blorp_address
79 #define __gen_user_data struct blorp_batch
80
81 static uint64_t
82 __gen_combine_address(struct blorp_batch *batch, void *location,
83 struct blorp_address address, uint32_t delta)
84 {
85 if (address.buffer == NULL) {
86 return address.offset + delta;
87 } else {
88 return blorp_emit_reloc(batch, location, address, delta);
89 }
90 }
91
92 #include "genxml/genX_pack.h"
93
94 #define _blorp_cmd_length(cmd) cmd ## _length
95 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
96 #define _blorp_cmd_header(cmd) cmd ## _header
97 #define _blorp_cmd_pack(cmd) cmd ## _pack
98
99 #define blorp_emit(batch, cmd, name) \
100 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
101 *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \
102 __builtin_expect(_dst != NULL, 1); \
103 _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \
104 _dst = NULL)
105
106 #define blorp_emitn(batch, cmd, n) ({ \
107 uint32_t *_dw = blorp_emit_dwords(batch, n); \
108 struct cmd template = { \
109 _blorp_cmd_header(cmd), \
110 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
111 }; \
112 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
113 _dw + 1; /* Array starts at dw[1] */ \
114 })
115
116 /* 3DSTATE_URB
117 * 3DSTATE_URB_VS
118 * 3DSTATE_URB_HS
119 * 3DSTATE_URB_DS
120 * 3DSTATE_URB_GS
121 *
122 * Assign the entire URB to the VS. Even though the VS disabled, URB space
123 * is still needed because the clipper loads the VUE's from the URB. From
124 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
125 * Dword 1.15:0 "VS Number of URB Entries":
126 * This field is always used (even if VS Function Enable is DISABLED).
127 *
128 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
129 * safely ignore it because this batch contains only one draw call.
130 * Because of URB corruption caused by allocating a previous GS unit
131 * URB entry to the VS unit, software is required to send a “GS NULL
132 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
133 * plus a dummy DRAW call before any case where VS will be taking over
134 * GS URB space.
135 *
136 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
137 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
138 *
139 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
140 * programmed in order for the programming of this state to be
141 * valid.
142 */
143 static void
144 emit_urb_config(struct blorp_batch *batch,
145 const struct blorp_params *params)
146 {
147 /* Once vertex fetcher has written full VUE entries with complete
148 * header the space requirement is as follows per vertex (in bytes):
149 *
150 * Header Position Program constants
151 * +--------+------------+-------------------+
152 * | 16 | 16 | n x 16 |
153 * +--------+------------+-------------------+
154 *
155 * where 'n' stands for number of varying inputs expressed as vec4s.
156 */
157 const unsigned num_varyings =
158 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
159 const unsigned total_needed = 16 + 16 + num_varyings * 16;
160
161 /* The URB size is expressed in units of 64 bytes (512 bits) */
162 const unsigned vs_entry_size = DIV_ROUND_UP(total_needed, 64);
163
164 blorp_emit_urb_config(batch, vs_entry_size);
165 }
166
167 static void
168 blorp_emit_vertex_data(struct blorp_batch *batch,
169 const struct blorp_params *params,
170 struct blorp_address *addr,
171 uint32_t *size)
172 {
173 const float vertices[] = {
174 /* v0 */ (float)params->x0, (float)params->y1,
175 /* v1 */ (float)params->x1, (float)params->y1,
176 /* v2 */ (float)params->x0, (float)params->y0,
177 };
178
179 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
180 memcpy(data, vertices, sizeof(vertices));
181 *size = sizeof(vertices);
182 }
183
184 static void
185 blorp_emit_input_varying_data(struct blorp_batch *batch,
186 const struct blorp_params *params,
187 struct blorp_address *addr,
188 uint32_t *size)
189 {
190 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
191 const unsigned max_num_varyings =
192 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
193 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
194
195 *size = num_varyings * vec4_size_in_bytes;
196
197 const float *const inputs_src = (const float *)&params->wm_inputs;
198 float *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
199
200 /* Walk over the attribute slots, determine if the attribute is used by
201 * the program and when necessary copy the values from the input storage to
202 * the vertex data buffer.
203 */
204 for (unsigned i = 0; i < max_num_varyings; i++) {
205 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
206
207 if (!(params->wm_prog_data->inputs_read & (1ull << attr)))
208 continue;
209
210 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
211
212 inputs += 4;
213 }
214 }
215
216 static void
217 blorp_emit_vertex_buffers(struct blorp_batch *batch,
218 const struct blorp_params *params)
219 {
220 struct GENX(VERTEX_BUFFER_STATE) vb[2];
221 memset(vb, 0, sizeof(vb));
222
223 unsigned num_buffers = 1;
224
225 uint32_t size;
226 blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
227 vb[0].VertexBufferIndex = 0;
228 vb[0].BufferPitch = 2 * sizeof(float);
229 vb[0].VertexBufferMOCS = batch->blorp->mocs.vb;
230 #if GEN_GEN >= 7
231 vb[0].AddressModifyEnable = true;
232 #endif
233 #if GEN_GEN >= 8
234 vb[0].BufferSize = size;
235 #else
236 vb[0].BufferAccessType = VERTEXDATA;
237 vb[0].EndAddress = vb[0].BufferStartingAddress;
238 vb[0].EndAddress.offset += size - 1;
239 #endif
240
241 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
242 blorp_emit_input_varying_data(batch, params,
243 &vb[1].BufferStartingAddress, &size);
244 vb[1].VertexBufferIndex = 1;
245 vb[1].BufferPitch = 0;
246 vb[1].VertexBufferMOCS = batch->blorp->mocs.vb;
247 #if GEN_GEN >= 7
248 vb[1].AddressModifyEnable = true;
249 #endif
250 #if GEN_GEN >= 8
251 vb[1].BufferSize = size;
252 #else
253 vb[1].BufferAccessType = INSTANCEDATA;
254 vb[1].EndAddress = vb[1].BufferStartingAddress;
255 vb[1].EndAddress.offset += size - 1;
256 #endif
257 num_buffers++;
258 }
259
260 const unsigned num_dwords =
261 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
262 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
263
264 for (unsigned i = 0; i < num_buffers; i++) {
265 GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
266 dw += GENX(VERTEX_BUFFER_STATE_length);
267 }
268 }
269
270 static void
271 blorp_emit_vertex_elements(struct blorp_batch *batch,
272 const struct blorp_params *params)
273 {
274 const unsigned num_varyings =
275 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
276 const unsigned num_elements = 2 + num_varyings;
277
278 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
279 memset(ve, 0, num_elements * sizeof(*ve));
280
281 /* Setup VBO for the rectangle primitive..
282 *
283 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
284 * vertices. The vertices reside in screen space with DirectX
285 * coordinates (that is, (0, 0) is the upper left corner).
286 *
287 * v2 ------ implied
288 * | |
289 * | |
290 * v0 ----- v1
291 *
292 * Since the VS is disabled, the clipper loads each VUE directly from
293 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
294 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
295 * dw0: Reserved, MBZ.
296 * dw1: Render Target Array Index. The HiZ op does not use indexed
297 * vertices, so set the dword to 0.
298 * dw2: Viewport Index. The HiZ op disables viewport mapping and
299 * scissoring, so set the dword to 0.
300 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
301 * so set the dword to 0.
302 * dw4: Vertex Position X.
303 * dw5: Vertex Position Y.
304 * dw6: Vertex Position Z.
305 * dw7: Vertex Position W.
306 *
307 * dw8: Flat vertex input 0
308 * dw9: Flat vertex input 1
309 * ...
310 * dwn: Flat vertex input n - 8
311 *
312 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
313 * "Vertex URB Entry (VUE) Formats".
314 *
315 * Only vertex position X and Y are going to be variable, Z is fixed to
316 * zero and W to one. Header words dw0-3 are all zero. There is no need to
317 * include the fixed values in the vertex buffer. Vertex fetcher can be
318 * instructed to fill vertex elements with constant values of one and zero
319 * instead of reading them from the buffer.
320 * Flat inputs are program constants that are not interpolated. Moreover
321 * their values will be the same between vertices.
322 *
323 * See the vertex element setup below.
324 */
325 ve[0].VertexBufferIndex = 0;
326 ve[0].Valid = true;
327 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
328 ve[0].SourceElementOffset = 0;
329 ve[0].Component0Control = VFCOMP_STORE_0;
330 ve[0].Component1Control = VFCOMP_STORE_0;
331 ve[0].Component2Control = VFCOMP_STORE_0;
332 ve[0].Component3Control = VFCOMP_STORE_0;
333
334 ve[1].VertexBufferIndex = 0;
335 ve[1].Valid = true;
336 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
337 ve[1].SourceElementOffset = 0;
338 ve[1].Component0Control = VFCOMP_STORE_SRC;
339 ve[1].Component1Control = VFCOMP_STORE_SRC;
340 ve[1].Component2Control = VFCOMP_STORE_0;
341 ve[1].Component3Control = VFCOMP_STORE_1_FP;
342
343 for (unsigned i = 0; i < num_varyings; ++i) {
344 ve[i + 2].VertexBufferIndex = 1;
345 ve[i + 2].Valid = true;
346 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
347 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
348 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
349 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
350 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
351 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
352 }
353
354 const unsigned num_dwords =
355 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
356 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
357
358 for (unsigned i = 0; i < num_elements; i++) {
359 GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);
360 dw += GENX(VERTEX_ELEMENT_STATE_length);
361 }
362
363 #if GEN_GEN >= 8
364 blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
365
366 for (unsigned i = 0; i < num_elements; i++) {
367 blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
368 vf.VertexElementIndex = i;
369 vf.InstancingEnable = false;
370 }
371 }
372
373 blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
374 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
375 }
376 #endif
377 }
378
379 static void
380 blorp_emit_sf_config(struct blorp_batch *batch,
381 const struct blorp_params *params)
382 {
383 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
384
385 /* 3DSTATE_SF
386 *
387 * Disable ViewportTransformEnable (dw2.1)
388 *
389 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
390 * Primitives Overview":
391 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
392 * use of screen- space coordinates).
393 *
394 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
395 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
396 *
397 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
398 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
399 * SOLID: Any triangle or rectangle object found to be front-facing
400 * is rendered as a solid object. This setting is required when
401 * (rendering rectangle (RECTLIST) objects.
402 */
403
404 #if GEN_GEN >= 8
405
406 blorp_emit(batch, GENX(3DSTATE_SF), sf);
407
408 blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
409 raster.CullMode = CULLMODE_NONE;
410 }
411
412 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
413 sbe.VertexURBEntryReadOffset = 1;
414 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
415 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
416 sbe.ForceVertexURBEntryReadLength = true;
417 sbe.ForceVertexURBEntryReadOffset = true;
418 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
419
420 #if GEN_GEN >= 9
421 for (unsigned i = 0; i < 32; i++)
422 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
423 #endif
424 }
425
426 #elif GEN_GEN >= 7
427
428 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
429 sf.FrontFaceFillMode = FILL_MODE_SOLID;
430 sf.BackFaceFillMode = FILL_MODE_SOLID;
431
432 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
433 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
434
435 #if GEN_GEN == 7
436 sf.DepthBufferSurfaceFormat = params->depth_format;
437 #endif
438 }
439
440 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
441 sbe.VertexURBEntryReadOffset = 1;
442 if (prog_data) {
443 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
444 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
445 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
446 } else {
447 sbe.NumberofSFOutputAttributes = 0;
448 sbe.VertexURBEntryReadLength = 1;
449 }
450 }
451
452 #else /* GEN_GEN <= 6 */
453
454 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
455 sf.FrontFaceFillMode = FILL_MODE_SOLID;
456 sf.BackFaceFillMode = FILL_MODE_SOLID;
457
458 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
459 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
460
461 sf.VertexURBEntryReadOffset = 1;
462 if (prog_data) {
463 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
464 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
465 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
466 } else {
467 sf.NumberofSFOutputAttributes = 0;
468 sf.VertexURBEntryReadLength = 1;
469 }
470 }
471
472 #endif /* GEN_GEN */
473 }
474
475 static void
476 blorp_emit_ps_config(struct blorp_batch *batch,
477 const struct blorp_params *params)
478 {
479 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
480
481 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
482 * nonzero to prevent the GPU from hanging. While the documentation doesn't
483 * mention this explicitly, it notes that the valid range for the field is
484 * [1,39] = [2,40] threads, which excludes zero.
485 *
486 * To be safe (and to minimize extraneous code) we go ahead and fully
487 * configure the WM state whether or not there is a WM program.
488 */
489
490 #if GEN_GEN >= 8
491
492 blorp_emit(batch, GENX(3DSTATE_WM), wm);
493
494 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
495 if (params->src.addr.buffer) {
496 ps.SamplerCount = 1; /* Up to 4 samplers */
497 ps.BindingTableEntryCount = 2;
498 } else {
499 ps.BindingTableEntryCount = 1;
500 }
501
502 ps.DispatchGRFStartRegisterForConstantSetupData0 =
503 prog_data->first_curbe_grf_0;
504 ps.DispatchGRFStartRegisterForConstantSetupData2 =
505 prog_data->first_curbe_grf_2;
506
507 ps._8PixelDispatchEnable = prog_data->dispatch_8;
508 ps._16PixelDispatchEnable = prog_data->dispatch_16;
509
510 ps.KernelStartPointer0 = params->wm_prog_kernel;
511 ps.KernelStartPointer2 =
512 params->wm_prog_kernel + prog_data->ksp_offset_2;
513
514 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
515 * it implicitly scales for different GT levels (which have some # of
516 * PSDs).
517 *
518 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
519 */
520 if (GEN_GEN >= 9)
521 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
522 else
523 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
524
525 switch (params->fast_clear_op) {
526 case BLORP_FAST_CLEAR_OP_NONE:
527 break;
528 #if GEN_GEN >= 9
529 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
530 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
531 break;
532 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
533 ps.RenderTargetResolveType = RESOLVE_FULL;
534 break;
535 #else
536 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
537 ps.RenderTargetResolveEnable = true;
538 break;
539 #endif
540 case BLORP_FAST_CLEAR_OP_CLEAR:
541 ps.RenderTargetFastClearEnable = true;
542 break;
543 default:
544 unreachable("Invalid fast clear op");
545 }
546 }
547
548 blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
549 psx.PixelShaderValid = true;
550
551 if (params->src.addr.buffer)
552 psx.PixelShaderKillsPixel = true;
553
554 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
555
556 if (prog_data && prog_data->persample_msaa_dispatch)
557 psx.PixelShaderIsPerSample = true;
558 }
559
560 #elif GEN_GEN >= 7
561
562 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
563 switch (params->hiz_op) {
564 case BLORP_HIZ_OP_DEPTH_CLEAR:
565 wm.DepthBufferClear = true;
566 break;
567 case BLORP_HIZ_OP_DEPTH_RESOLVE:
568 wm.DepthBufferResolveEnable = true;
569 break;
570 case BLORP_HIZ_OP_HIZ_RESOLVE:
571 wm.HierarchicalDepthBufferResolveEnable = true;
572 break;
573 case BLORP_HIZ_OP_NONE:
574 break;
575 default:
576 unreachable("not reached");
577 }
578
579 if (prog_data)
580 wm.ThreadDispatchEnable = true;
581
582 if (params->src.addr.buffer)
583 wm.PixelShaderKillPixel = true;
584
585 if (params->dst.surf.samples > 1) {
586 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
587 wm.MultisampleDispatchMode =
588 (prog_data && prog_data->persample_msaa_dispatch) ?
589 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
590 } else {
591 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
592 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
593 }
594 }
595
596 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
597 ps.MaximumNumberofThreads =
598 batch->blorp->isl_dev->info->max_wm_threads - 1;
599
600 #if GEN_IS_HASWELL
601 ps.SampleMask = 1;
602 #endif
603
604 if (prog_data) {
605 ps.DispatchGRFStartRegisterforConstantSetupData0 =
606 prog_data->first_curbe_grf_0;
607 ps.DispatchGRFStartRegisterforConstantSetupData2 =
608 prog_data->first_curbe_grf_2;
609
610 ps.KernelStartPointer0 = params->wm_prog_kernel;
611 ps.KernelStartPointer2 =
612 params->wm_prog_kernel + prog_data->ksp_offset_2;
613
614 ps._8PixelDispatchEnable = prog_data->dispatch_8;
615 ps._16PixelDispatchEnable = prog_data->dispatch_16;
616
617 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
618 } else {
619 /* Gen7 hardware gets angry if we don't enable at least one dispatch
620 * mode, so just enable 16-pixel dispatch if we don't have a program.
621 */
622 ps._16PixelDispatchEnable = true;
623 }
624
625 if (params->src.addr.buffer)
626 ps.SamplerCount = 1; /* Up to 4 samplers */
627
628 switch (params->fast_clear_op) {
629 case BLORP_FAST_CLEAR_OP_NONE:
630 break;
631 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
632 ps.RenderTargetResolveEnable = true;
633 break;
634 case BLORP_FAST_CLEAR_OP_CLEAR:
635 ps.RenderTargetFastClearEnable = true;
636 break;
637 default:
638 unreachable("Invalid fast clear op");
639 }
640 }
641
642 #else /* GEN_GEN <= 6 */
643
644 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
645 wm.MaximumNumberofThreads =
646 batch->blorp->isl_dev->info->max_wm_threads - 1;
647
648 switch (params->hiz_op) {
649 case BLORP_HIZ_OP_DEPTH_CLEAR:
650 wm.DepthBufferClear = true;
651 break;
652 case BLORP_HIZ_OP_DEPTH_RESOLVE:
653 wm.DepthBufferResolveEnable = true;
654 break;
655 case BLORP_HIZ_OP_HIZ_RESOLVE:
656 wm.HierarchicalDepthBufferResolveEnable = true;
657 break;
658 case BLORP_HIZ_OP_NONE:
659 break;
660 default:
661 unreachable("not reached");
662 }
663
664 if (prog_data) {
665 wm.ThreadDispatchEnable = true;
666
667 wm.DispatchGRFStartRegisterforConstantSetupData0 =
668 prog_data->first_curbe_grf_0;
669 wm.DispatchGRFStartRegisterforConstantSetupData2 =
670 prog_data->first_curbe_grf_2;
671
672 wm.KernelStartPointer0 = params->wm_prog_kernel;
673 wm.KernelStartPointer2 =
674 params->wm_prog_kernel + prog_data->ksp_offset_2;
675
676 wm._8PixelDispatchEnable = prog_data->dispatch_8;
677 wm._16PixelDispatchEnable = prog_data->dispatch_16;
678
679 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
680 }
681
682 if (params->src.addr.buffer) {
683 wm.SamplerCount = 1; /* Up to 4 samplers */
684 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
685 }
686
687 if (params->dst.surf.samples > 1) {
688 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
689 wm.MultisampleDispatchMode =
690 (prog_data && prog_data->persample_msaa_dispatch) ?
691 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
692 } else {
693 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
694 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
695 }
696 }
697
698 #endif /* GEN_GEN */
699 }
700
701
702 static void
703 blorp_emit_depth_stencil_config(struct blorp_batch *batch,
704 const struct blorp_params *params)
705 {
706 #if GEN_GEN >= 7
707 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
708 #else
709 const uint32_t mocs = 0;
710 #endif
711
712 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
713 switch (params->depth.surf.dim) {
714 case ISL_SURF_DIM_1D:
715 db.SurfaceType = SURFTYPE_1D;
716 break;
717 case ISL_SURF_DIM_2D:
718 db.SurfaceType = SURFTYPE_2D;
719 break;
720 case ISL_SURF_DIM_3D:
721 db.SurfaceType = SURFTYPE_3D;
722 break;
723 }
724
725 db.SurfaceFormat = params->depth_format;
726
727 #if GEN_GEN >= 7
728 db.DepthWriteEnable = true;
729 #endif
730
731 #if GEN_GEN <= 6
732 db.TiledSurface = true;
733 db.TileWalk = TILEWALK_YMAJOR;
734 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
735 db.SeparateStencilBufferEnable = true;
736 #endif
737
738 db.HierarchicalDepthBufferEnable = true;
739
740 db.Width = params->depth.surf.logical_level0_px.width - 1;
741 db.Height = params->depth.surf.logical_level0_px.height - 1;
742 db.RenderTargetViewExtent = db.Depth =
743 MAX2(params->depth.surf.logical_level0_px.depth,
744 params->depth.surf.logical_level0_px.array_len) - 1;
745
746 db.LOD = params->depth.view.base_level;
747 db.MinimumArrayElement = params->depth.view.base_array_layer;
748
749 db.SurfacePitch = params->depth.surf.row_pitch - 1;
750 db.SurfaceBaseAddress = params->depth.addr;
751 db.DepthBufferMOCS = mocs;
752 }
753
754 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
755 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
756 hiz.SurfaceBaseAddress = params->depth.aux_addr;
757 hiz.HierarchicalDepthBufferMOCS = mocs;
758 }
759
760 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
761 }
762
763 static uint32_t
764 blorp_emit_blend_state(struct blorp_batch *batch,
765 const struct blorp_params *params)
766 {
767 struct GENX(BLEND_STATE) blend;
768 memset(&blend, 0, sizeof(blend));
769
770 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
771 blend.Entry[i].PreBlendColorClampEnable = true;
772 blend.Entry[i].PostBlendColorClampEnable = true;
773 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
774
775 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
776 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
777 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
778 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
779 }
780
781 uint32_t offset;
782 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE,
783 GENX(BLEND_STATE_length) * 4,
784 64, &offset);
785 GENX(BLEND_STATE_pack)(NULL, state, &blend);
786
787 #if GEN_GEN >= 7
788 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
789 sp.BlendStatePointer = offset;
790 #if GEN_GEN >= 8
791 sp.BlendStatePointerValid = true;
792 #endif
793 }
794 #endif
795
796 #if GEN_GEN >= 8
797 blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
798 ps_blend.HasWriteableRT = true;
799 }
800 #endif
801
802 return offset;
803 }
804
805 static uint32_t
806 blorp_emit_color_calc_state(struct blorp_batch *batch,
807 const struct blorp_params *params)
808 {
809 uint32_t offset;
810 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE,
811 GENX(COLOR_CALC_STATE_length) * 4,
812 64, &offset);
813 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
814
815 #if GEN_GEN >= 7
816 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
817 sp.ColorCalcStatePointer = offset;
818 #if GEN_GEN >= 8
819 sp.ColorCalcStatePointerValid = true;
820 #endif
821 }
822 #endif
823
824 return offset;
825 }
826
827 static uint32_t
828 blorp_emit_depth_stencil_state(struct blorp_batch *batch,
829 const struct blorp_params *params)
830 {
831 #if GEN_GEN >= 8
832
833 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
834 blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
835 return 0;
836
837 #else /* GEN_GEN <= 7 */
838
839 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
840 * - 7.5.3.1 Depth Buffer Clear
841 * - 7.5.3.2 Depth Buffer Resolve
842 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
843 */
844 struct GENX(DEPTH_STENCIL_STATE) ds = {
845 .DepthBufferWriteEnable = true,
846 };
847
848 if (params->hiz_op == BLORP_HIZ_OP_DEPTH_RESOLVE) {
849 ds.DepthTestEnable = true;
850 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
851 }
852
853 uint32_t offset;
854 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE,
855 GENX(DEPTH_STENCIL_STATE_length) * 4,
856 64, &offset);
857 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
858
859 #if GEN_GEN >= 7
860 blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
861 sp.PointertoDEPTH_STENCIL_STATE = offset;
862 }
863 #endif
864
865 return offset;
866
867 #endif /* GEN_GEN */
868 }
869
870 struct surface_state_info {
871 unsigned num_dwords;
872 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
873 unsigned reloc_dw;
874 unsigned aux_reloc_dw;
875 };
876
877 static const struct surface_state_info surface_state_infos[] = {
878 [6] = {6, 32, 1, 0},
879 [7] = {8, 32, 1, 6},
880 [8] = {13, 64, 8, 10},
881 [9] = {16, 64, 8, 10},
882 };
883
884 static void
885 blorp_emit_surface_state(struct blorp_batch *batch,
886 const struct brw_blorp_surface_info *surface,
887 uint32_t *state, uint32_t state_offset,
888 bool is_render_target)
889 {
890 const struct surface_state_info ss_info = surface_state_infos[GEN_GEN];
891
892 struct isl_surf surf = surface->surf;
893
894 if (surf.dim == ISL_SURF_DIM_1D &&
895 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
896 assert(surf.logical_level0_px.height == 1);
897 surf.dim = ISL_SURF_DIM_2D;
898 }
899
900 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
901 enum isl_aux_usage aux_usage = surface->aux_usage;
902 if (aux_usage == ISL_AUX_USAGE_HIZ)
903 aux_usage = ISL_AUX_USAGE_NONE;
904
905 const uint32_t mocs =
906 is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
907
908 isl_surf_fill_state(batch->blorp->isl_dev, state,
909 .surf = &surf, .view = &surface->view,
910 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
911 .mocs = mocs, .clear_color = surface->clear_color,
912 .x_offset_sa = surface->tile_x_sa,
913 .y_offset_sa = surface->tile_y_sa);
914
915 blorp_surface_reloc(batch, state_offset + ss_info.reloc_dw * 4,
916 surface->addr, 0);
917
918 if (aux_usage != ISL_AUX_USAGE_NONE) {
919 /* On gen7 and prior, the bottom 12 bits of the MCS base address are
920 * used to store other information. This should be ok, however, because
921 * surface buffer addresses are always 4K page alinged.
922 */
923 assert((surface->aux_addr.offset & 0xfff) == 0);
924 blorp_surface_reloc(batch, state_offset + ss_info.aux_reloc_dw * 4,
925 surface->aux_addr, state[ss_info.aux_reloc_dw]);
926 }
927 }
928
929 static void
930 blorp_emit_surface_states(struct blorp_batch *batch,
931 const struct blorp_params *params)
932 {
933 uint32_t bind_offset, *bind_map;
934 void *surface_maps[2];
935
936 const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
937 const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32;
938
939 unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
940 blorp_alloc_binding_table(batch, num_surfaces, ss_size, ss_align,
941 &bind_offset, &bind_map, surface_maps);
942
943 blorp_emit_surface_state(batch, &params->dst,
944 surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
945 bind_map[BLORP_RENDERBUFFER_BT_INDEX], true);
946 if (params->src.addr.buffer) {
947 blorp_emit_surface_state(batch, &params->src,
948 surface_maps[BLORP_TEXTURE_BT_INDEX],
949 bind_map[BLORP_TEXTURE_BT_INDEX], false);
950 }
951
952 #if GEN_GEN >= 7
953 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
954 bt.PointertoPSBindingTable = bind_offset;
955 }
956 #else
957 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
958 bt.PSBindingTableChange = true;
959 bt.PointertoPSBindingTable = bind_offset;
960 }
961 #endif
962 }
963
964 static void
965 blorp_emit_sampler_state(struct blorp_batch *batch,
966 const struct blorp_params *params)
967 {
968 struct GENX(SAMPLER_STATE) sampler = {
969 .MipModeFilter = MIPFILTER_NONE,
970 .MagModeFilter = MAPFILTER_LINEAR,
971 .MinModeFilter = MAPFILTER_LINEAR,
972 .MinLOD = 0,
973 .MaxLOD = 0,
974 .TCXAddressControlMode = TCM_CLAMP,
975 .TCYAddressControlMode = TCM_CLAMP,
976 .TCZAddressControlMode = TCM_CLAMP,
977 .MaximumAnisotropy = RATIO21,
978 .RAddressMinFilterRoundingEnable = true,
979 .RAddressMagFilterRoundingEnable = true,
980 .VAddressMinFilterRoundingEnable = true,
981 .VAddressMagFilterRoundingEnable = true,
982 .UAddressMinFilterRoundingEnable = true,
983 .UAddressMagFilterRoundingEnable = true,
984 .NonnormalizedCoordinateEnable = true,
985 };
986
987 uint32_t offset;
988 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE,
989 GENX(SAMPLER_STATE_length) * 4,
990 32, &offset);
991 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
992
993 #if GEN_GEN >= 7
994 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
995 ssp.PointertoPSSamplerState = offset;
996 }
997 #else
998 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
999 ssp.VSSamplerStateChange = true;
1000 ssp.GSSamplerStateChange = true;
1001 ssp.PSSamplerStateChange = true;
1002 ssp.PointertoPSSamplerState = offset;
1003 }
1004 #endif
1005 }
1006
1007 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1008 static void
1009 blorp_emit_viewport_state(struct blorp_batch *batch,
1010 const struct blorp_params *params)
1011 {
1012 uint32_t cc_vp_offset;
1013
1014 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE,
1015 GENX(CC_VIEWPORT_length) * 4, 32,
1016 &cc_vp_offset);
1017
1018 GENX(CC_VIEWPORT_pack)(batch, state,
1019 &(struct GENX(CC_VIEWPORT)) {
1020 .MinimumDepth = 0.0,
1021 .MaximumDepth = 1.0,
1022 });
1023
1024 #if GEN_GEN >= 7
1025 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1026 vsp.CCViewportPointer = cc_vp_offset;
1027 }
1028 #else
1029 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1030 vsp.CCViewportStateChange = true;
1031 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1032 }
1033 #endif
1034 }
1035
1036
1037 /**
1038 * \brief Execute a blit or render pass operation.
1039 *
1040 * To execute the operation, this function manually constructs and emits a
1041 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1042 * constructing and after emitting the batch.
1043 *
1044 * This function alters no GL state.
1045 */
1046 static void
1047 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
1048 {
1049 uint32_t blend_state_offset = 0;
1050 uint32_t color_calc_state_offset = 0;
1051 uint32_t depth_stencil_state_offset;
1052
1053 blorp_emit_vertex_buffers(batch, params);
1054 blorp_emit_vertex_elements(batch, params);
1055
1056 emit_urb_config(batch, params);
1057
1058 if (params->wm_prog_data) {
1059 blend_state_offset = blorp_emit_blend_state(batch, params);
1060 color_calc_state_offset = blorp_emit_color_calc_state(batch, params);
1061 }
1062 depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);
1063
1064 #if GEN_GEN <= 6
1065 /* 3DSTATE_CC_STATE_POINTERS
1066 *
1067 * The pointer offsets are relative to
1068 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1069 *
1070 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1071 *
1072 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1073 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1074 * one CC_STATE_POINTERS packet so we have to emit that here.
1075 */
1076 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1077 cc.BLEND_STATEChange = true;
1078 cc.COLOR_CALC_STATEChange = true;
1079 cc.DEPTH_STENCIL_STATEChange = true;
1080 cc.PointertoBLEND_STATE = blend_state_offset;
1081 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1082 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1083 }
1084 #else
1085 (void)blend_state_offset;
1086 (void)color_calc_state_offset;
1087 (void)depth_stencil_state_offset;
1088 #endif
1089
1090 blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
1091 #if GEN_GEN >= 7
1092 blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
1093 blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
1094 #endif
1095 blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
1096 blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
1097
1098 if (params->wm_prog_data)
1099 blorp_emit_surface_states(batch, params);
1100
1101 if (params->src.addr.buffer)
1102 blorp_emit_sampler_state(batch, params);
1103
1104 blorp_emit_3dstate_multisample(batch, params->dst.surf.samples);
1105
1106 blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
1107 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1108 }
1109
1110 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1111 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1112 *
1113 * [DevSNB] A pipeline flush must be programmed prior to a
1114 * 3DSTATE_VS command that causes the VS Function Enable to
1115 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1116 * command with CS stall bit set and a post sync operation.
1117 *
1118 * We've already done one at the start of the BLORP operation.
1119 */
1120 blorp_emit(batch, GENX(3DSTATE_VS), vs);
1121 #if GEN_GEN >= 7
1122 blorp_emit(batch, GENX(3DSTATE_HS), hs);
1123 blorp_emit(batch, GENX(3DSTATE_TE), te);
1124 blorp_emit(batch, GENX(3DSTATE_DS), DS);
1125 blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
1126 #endif
1127 blorp_emit(batch, GENX(3DSTATE_GS), gs);
1128
1129 blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
1130 clip.PerspectiveDivideDisable = true;
1131 }
1132
1133 blorp_emit_sf_config(batch, params);
1134 blorp_emit_ps_config(batch, params);
1135
1136 blorp_emit_viewport_state(batch, params);
1137
1138 if (params->depth.addr.buffer) {
1139 blorp_emit_depth_stencil_config(batch, params);
1140 } else {
1141 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
1142 db.SurfaceType = SURFTYPE_NULL;
1143 db.SurfaceFormat = D32_FLOAT;
1144 }
1145 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1146 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
1147 }
1148
1149 /* 3DSTATE_CLEAR_PARAMS
1150 *
1151 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1152 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1153 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1154 */
1155 blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1156 clear.DepthClearValueValid = true;
1157 clear.DepthClearValue = params->depth.clear_color.u32[0];
1158 }
1159
1160 blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1161 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1162 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1163 }
1164
1165 blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
1166 prim.VertexAccessType = SEQUENTIAL;
1167 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1168 prim.VertexCountPerInstance = 3;
1169 prim.InstanceCount = params->num_layers;
1170 }
1171 }