i965: Move blorp into src/intel/blorp
[mesa.git] / src / intel / blorp / blorp_genX_exec.h
1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "blorp_priv.h"
25 #include "brw_device_info.h"
26 #include "intel_aub.h"
27
28 /**
29 * This file provides the blorp pipeline setup and execution functionality.
30 * It defines the following function:
31 *
32 * static void
33 * blorp_exec(struct blorp_context *blorp, void *batch_data,
34 * const struct blorp_params *params);
35 *
36 * It is the job of whoever includes this header to wrap this in something
37 * to get an externally visible symbol.
38 *
39 * In order for the blorp_exec function to work, the driver must provide
40 * implementations of the following static helper functions.
41 */
42
43 static void *
44 blorp_emit_dwords(struct blorp_batch *batch, unsigned n);
45
46 static uint64_t
47 blorp_emit_reloc(struct blorp_batch *batch,
48 void *location, struct blorp_address address, uint32_t delta);
49
50 static void *
51 blorp_alloc_dynamic_state(struct blorp_batch *batch,
52 enum aub_state_struct_type type,
53 uint32_t size,
54 uint32_t alignment,
55 uint32_t *offset);
56 static void *
57 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
58 struct blorp_address *addr);
59
60 static void
61 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
62 unsigned state_size, unsigned state_alignment,
63 uint32_t *bt_offset, uint32_t **bt_map,
64 void **surface_maps);
65 static void
66 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
67 struct blorp_address address, uint32_t delta);
68
69 static void
70 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size);
71 static void
72 blorp_emit_3dstate_multisample(struct blorp_batch *batch, unsigned samples);
73
74 /***** BEGIN blorp_exec implementation ******/
75
76 #include "genxml/gen_macros.h"
77
78 #define __gen_address_type struct blorp_address
79 #define __gen_user_data struct blorp_batch
80
81 static uint64_t
82 __gen_combine_address(struct blorp_batch *batch, void *location,
83 struct blorp_address address, uint32_t delta)
84 {
85 if (address.buffer == NULL) {
86 return address.offset + delta;
87 } else {
88 return blorp_emit_reloc(batch, location, address, delta);
89 }
90 }
91
92 #include "genxml/genX_pack.h"
93
94 #define _blorp_cmd_length(cmd) cmd ## _length
95 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
96 #define _blorp_cmd_header(cmd) cmd ## _header
97 #define _blorp_cmd_pack(cmd) cmd ## _pack
98
99 #define blorp_emit(batch, cmd, name) \
100 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
101 *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \
102 __builtin_expect(_dst != NULL, 1); \
103 _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \
104 _dst = NULL)
105
106 #define blorp_emitn(batch, cmd, n) ({ \
107 uint32_t *_dw = blorp_emit_dwords(batch, n); \
108 struct cmd template = { \
109 _blorp_cmd_header(cmd), \
110 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
111 }; \
112 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
113 _dw + 1; /* Array starts at dw[1] */ \
114 })
115
116 /* Once vertex fetcher has written full VUE entries with complete
117 * header the space requirement is as follows per vertex (in bytes):
118 *
119 * Header Position Program constants
120 * +--------+------------+-------------------+
121 * | 16 | 16 | n x 16 |
122 * +--------+------------+-------------------+
123 *
124 * where 'n' stands for number of varying inputs expressed as vec4s.
125 *
126 * The URB size is in turn expressed in 64 bytes (512 bits).
127 */
128 static inline unsigned
129 gen7_blorp_get_vs_entry_size(const struct blorp_params *params)
130 {
131 const unsigned num_varyings =
132 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
133 const unsigned total_needed = 16 + 16 + num_varyings * 16;
134
135 return DIV_ROUND_UP(total_needed, 64);
136 }
137
138 /* 3DSTATE_URB
139 * 3DSTATE_URB_VS
140 * 3DSTATE_URB_HS
141 * 3DSTATE_URB_DS
142 * 3DSTATE_URB_GS
143 *
144 * Assign the entire URB to the VS. Even though the VS disabled, URB space
145 * is still needed because the clipper loads the VUE's from the URB. From
146 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
147 * Dword 1.15:0 "VS Number of URB Entries":
148 * This field is always used (even if VS Function Enable is DISABLED).
149 *
150 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
151 * safely ignore it because this batch contains only one draw call.
152 * Because of URB corruption caused by allocating a previous GS unit
153 * URB entry to the VS unit, software is required to send a “GS NULL
154 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
155 * plus a dummy DRAW call before any case where VS will be taking over
156 * GS URB space.
157 *
158 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
159 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
160 *
161 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
162 * programmed in order for the programming of this state to be
163 * valid.
164 */
165 static void
166 emit_urb_config(struct blorp_batch *batch,
167 const struct blorp_params *params)
168 {
169 blorp_emit_urb_config(batch, gen7_blorp_get_vs_entry_size(params));
170 }
171
172 static void
173 blorp_emit_vertex_data(struct blorp_batch *batch,
174 const struct blorp_params *params,
175 struct blorp_address *addr,
176 uint32_t *size)
177 {
178 const float vertices[] = {
179 /* v0 */ (float)params->x0, (float)params->y1,
180 /* v1 */ (float)params->x1, (float)params->y1,
181 /* v2 */ (float)params->x0, (float)params->y0,
182 };
183
184 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
185 memcpy(data, vertices, sizeof(vertices));
186 *size = sizeof(vertices);
187 }
188
189 static void
190 blorp_emit_input_varying_data(struct blorp_batch *batch,
191 const struct blorp_params *params,
192 struct blorp_address *addr,
193 uint32_t *size)
194 {
195 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
196 const unsigned max_num_varyings =
197 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
198 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
199
200 *size = num_varyings * vec4_size_in_bytes;
201
202 const float *const inputs_src = (const float *)&params->wm_inputs;
203 float *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
204
205 /* Walk over the attribute slots, determine if the attribute is used by
206 * the program and when necessary copy the values from the input storage to
207 * the vertex data buffer.
208 */
209 for (unsigned i = 0; i < max_num_varyings; i++) {
210 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
211
212 if (!(params->wm_prog_data->inputs_read & (1ull << attr)))
213 continue;
214
215 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
216
217 inputs += 4;
218 }
219 }
220
221 static void
222 blorp_emit_vertex_buffers(struct blorp_batch *batch,
223 const struct blorp_params *params)
224 {
225 struct GENX(VERTEX_BUFFER_STATE) vb[2];
226 memset(vb, 0, sizeof(vb));
227
228 unsigned num_buffers = 1;
229
230 uint32_t size;
231 blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
232 vb[0].VertexBufferIndex = 0;
233 vb[0].BufferPitch = 2 * sizeof(float);
234 vb[0].VertexBufferMOCS = batch->blorp->mocs.vb;
235 #if GEN_GEN >= 7
236 vb[0].AddressModifyEnable = true;
237 #endif
238 #if GEN_GEN >= 8
239 vb[0].BufferSize = size;
240 #else
241 vb[0].BufferAccessType = VERTEXDATA;
242 vb[0].EndAddress = vb[0].BufferStartingAddress;
243 vb[0].EndAddress.offset += size - 1;
244 #endif
245
246 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
247 blorp_emit_input_varying_data(batch, params,
248 &vb[1].BufferStartingAddress, &size);
249 vb[1].VertexBufferIndex = 1;
250 vb[1].BufferPitch = 0;
251 vb[1].VertexBufferMOCS = batch->blorp->mocs.vb;
252 #if GEN_GEN >= 7
253 vb[1].AddressModifyEnable = true;
254 #endif
255 #if GEN_GEN >= 8
256 vb[1].BufferSize = size;
257 #else
258 vb[1].BufferAccessType = INSTANCEDATA;
259 vb[1].EndAddress = vb[1].BufferStartingAddress;
260 vb[1].EndAddress.offset += size - 1;
261 #endif
262 num_buffers++;
263 }
264
265 const unsigned num_dwords =
266 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
267 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
268
269 for (unsigned i = 0; i < num_buffers; i++) {
270 GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
271 dw += GENX(VERTEX_BUFFER_STATE_length);
272 }
273 }
274
275 static void
276 blorp_emit_vertex_elements(struct blorp_batch *batch,
277 const struct blorp_params *params)
278 {
279 const unsigned num_varyings =
280 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
281 const unsigned num_elements = 2 + num_varyings;
282
283 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
284 memset(ve, 0, num_elements * sizeof(*ve));
285
286 /* Setup VBO for the rectangle primitive..
287 *
288 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
289 * vertices. The vertices reside in screen space with DirectX
290 * coordinates (that is, (0, 0) is the upper left corner).
291 *
292 * v2 ------ implied
293 * | |
294 * | |
295 * v0 ----- v1
296 *
297 * Since the VS is disabled, the clipper loads each VUE directly from
298 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
299 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
300 * dw0: Reserved, MBZ.
301 * dw1: Render Target Array Index. The HiZ op does not use indexed
302 * vertices, so set the dword to 0.
303 * dw2: Viewport Index. The HiZ op disables viewport mapping and
304 * scissoring, so set the dword to 0.
305 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
306 * so set the dword to 0.
307 * dw4: Vertex Position X.
308 * dw5: Vertex Position Y.
309 * dw6: Vertex Position Z.
310 * dw7: Vertex Position W.
311 *
312 * dw8: Flat vertex input 0
313 * dw9: Flat vertex input 1
314 * ...
315 * dwn: Flat vertex input n - 8
316 *
317 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
318 * "Vertex URB Entry (VUE) Formats".
319 *
320 * Only vertex position X and Y are going to be variable, Z is fixed to
321 * zero and W to one. Header words dw0-3 are all zero. There is no need to
322 * include the fixed values in the vertex buffer. Vertex fetcher can be
323 * instructed to fill vertex elements with constant values of one and zero
324 * instead of reading them from the buffer.
325 * Flat inputs are program constants that are not interpolated. Moreover
326 * their values will be the same between vertices.
327 *
328 * See the vertex element setup below.
329 */
330 ve[0].VertexBufferIndex = 0;
331 ve[0].Valid = true;
332 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
333 ve[0].SourceElementOffset = 0;
334 ve[0].Component0Control = VFCOMP_STORE_0;
335 ve[0].Component1Control = VFCOMP_STORE_0;
336 ve[0].Component2Control = VFCOMP_STORE_0;
337 ve[0].Component3Control = VFCOMP_STORE_0;
338
339 ve[1].VertexBufferIndex = 0;
340 ve[1].Valid = true;
341 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
342 ve[1].SourceElementOffset = 0;
343 ve[1].Component0Control = VFCOMP_STORE_SRC;
344 ve[1].Component1Control = VFCOMP_STORE_SRC;
345 ve[1].Component2Control = VFCOMP_STORE_0;
346 ve[1].Component3Control = VFCOMP_STORE_1_FP;
347
348 for (unsigned i = 0; i < num_varyings; ++i) {
349 ve[i + 2].VertexBufferIndex = 1;
350 ve[i + 2].Valid = true;
351 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
352 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
353 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
354 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
355 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
356 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
357 }
358
359 const unsigned num_dwords =
360 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
361 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
362
363 for (unsigned i = 0; i < num_elements; i++) {
364 GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);
365 dw += GENX(VERTEX_ELEMENT_STATE_length);
366 }
367
368 #if GEN_GEN >= 8
369 blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
370
371 for (unsigned i = 0; i < num_elements; i++) {
372 blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
373 vf.VertexElementIndex = i;
374 vf.InstancingEnable = false;
375 }
376 }
377
378 blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
379 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
380 }
381 #endif
382 }
383
384 static void
385 blorp_emit_sf_config(struct blorp_batch *batch,
386 const struct blorp_params *params)
387 {
388 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
389
390 /* 3DSTATE_SF
391 *
392 * Disable ViewportTransformEnable (dw2.1)
393 *
394 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
395 * Primitives Overview":
396 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
397 * use of screen- space coordinates).
398 *
399 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
400 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
401 *
402 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
403 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
404 * SOLID: Any triangle or rectangle object found to be front-facing
405 * is rendered as a solid object. This setting is required when
406 * (rendering rectangle (RECTLIST) objects.
407 */
408
409 #if GEN_GEN >= 8
410
411 blorp_emit(batch, GENX(3DSTATE_SF), sf);
412
413 blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
414 raster.CullMode = CULLMODE_NONE;
415 }
416
417 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
418 sbe.VertexURBEntryReadOffset = 1;
419 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
420 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
421 sbe.ForceVertexURBEntryReadLength = true;
422 sbe.ForceVertexURBEntryReadOffset = true;
423 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
424
425 #if GEN_GEN >= 9
426 for (unsigned i = 0; i < 32; i++)
427 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
428 #endif
429 }
430
431 #elif GEN_GEN >= 7
432
433 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
434 sf.FrontFaceFillMode = FILL_MODE_SOLID;
435 sf.BackFaceFillMode = FILL_MODE_SOLID;
436
437 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
438 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
439
440 #if GEN_GEN == 7
441 sf.DepthBufferSurfaceFormat = params->depth_format;
442 #endif
443 }
444
445 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
446 sbe.VertexURBEntryReadOffset = 1;
447 if (prog_data) {
448 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
449 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
450 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
451 } else {
452 sbe.NumberofSFOutputAttributes = 0;
453 sbe.VertexURBEntryReadLength = 1;
454 }
455 }
456
457 #else /* GEN_GEN <= 6 */
458
459 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
460 sf.FrontFaceFillMode = FILL_MODE_SOLID;
461 sf.BackFaceFillMode = FILL_MODE_SOLID;
462
463 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
464 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
465
466 sf.VertexURBEntryReadOffset = 1;
467 if (prog_data) {
468 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
469 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
470 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
471 } else {
472 sf.NumberofSFOutputAttributes = 0;
473 sf.VertexURBEntryReadLength = 1;
474 }
475 }
476
477 #endif /* GEN_GEN */
478 }
479
480 static void
481 blorp_emit_ps_config(struct blorp_batch *batch,
482 const struct blorp_params *params)
483 {
484 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
485
486 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
487 * nonzero to prevent the GPU from hanging. While the documentation doesn't
488 * mention this explicitly, it notes that the valid range for the field is
489 * [1,39] = [2,40] threads, which excludes zero.
490 *
491 * To be safe (and to minimize extraneous code) we go ahead and fully
492 * configure the WM state whether or not there is a WM program.
493 */
494
495 #if GEN_GEN >= 8
496
497 blorp_emit(batch, GENX(3DSTATE_WM), wm);
498
499 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
500 if (params->src.addr.buffer) {
501 ps.SamplerCount = 1; /* Up to 4 samplers */
502 ps.BindingTableEntryCount = 2;
503 } else {
504 ps.BindingTableEntryCount = 1;
505 }
506
507 ps.DispatchGRFStartRegisterForConstantSetupData0 =
508 prog_data->first_curbe_grf_0;
509 ps.DispatchGRFStartRegisterForConstantSetupData2 =
510 prog_data->first_curbe_grf_2;
511
512 ps._8PixelDispatchEnable = prog_data->dispatch_8;
513 ps._16PixelDispatchEnable = prog_data->dispatch_16;
514
515 ps.KernelStartPointer0 = params->wm_prog_kernel;
516 ps.KernelStartPointer2 =
517 params->wm_prog_kernel + prog_data->ksp_offset_2;
518
519 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
520 * it implicitly scales for different GT levels (which have some # of
521 * PSDs).
522 *
523 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
524 */
525 if (GEN_GEN >= 9)
526 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
527 else
528 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
529
530 switch (params->fast_clear_op) {
531 case BLORP_FAST_CLEAR_OP_NONE:
532 break;
533 #if GEN_GEN >= 9
534 case BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL:
535 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
536 break;
537 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
538 ps.RenderTargetResolveType = RESOLVE_FULL;
539 break;
540 #else
541 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
542 ps.RenderTargetResolveEnable = true;
543 break;
544 #endif
545 case BLORP_FAST_CLEAR_OP_CLEAR:
546 ps.RenderTargetFastClearEnable = true;
547 break;
548 default:
549 unreachable("Invalid fast clear op");
550 }
551 }
552
553 blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
554 psx.PixelShaderValid = true;
555
556 if (params->src.addr.buffer)
557 psx.PixelShaderKillsPixel = true;
558
559 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
560
561 if (prog_data && prog_data->persample_msaa_dispatch)
562 psx.PixelShaderIsPerSample = true;
563 }
564
565 #elif GEN_GEN >= 7
566
567 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
568 switch (params->hiz_op) {
569 case BLORP_HIZ_OP_DEPTH_CLEAR:
570 wm.DepthBufferClear = true;
571 break;
572 case BLORP_HIZ_OP_DEPTH_RESOLVE:
573 wm.DepthBufferResolveEnable = true;
574 break;
575 case BLORP_HIZ_OP_HIZ_RESOLVE:
576 wm.HierarchicalDepthBufferResolveEnable = true;
577 break;
578 case BLORP_HIZ_OP_NONE:
579 break;
580 default:
581 unreachable("not reached");
582 }
583
584 if (prog_data)
585 wm.ThreadDispatchEnable = true;
586
587 if (params->src.addr.buffer)
588 wm.PixelShaderKillPixel = true;
589
590 if (params->dst.surf.samples > 1) {
591 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
592 wm.MultisampleDispatchMode =
593 (prog_data && prog_data->persample_msaa_dispatch) ?
594 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
595 } else {
596 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
597 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
598 }
599 }
600
601 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
602 ps.MaximumNumberofThreads =
603 batch->blorp->isl_dev->info->max_wm_threads - 1;
604
605 #if GEN_IS_HASWELL
606 ps.SampleMask = 1;
607 #endif
608
609 if (prog_data) {
610 ps.DispatchGRFStartRegisterforConstantSetupData0 =
611 prog_data->first_curbe_grf_0;
612 ps.DispatchGRFStartRegisterforConstantSetupData2 =
613 prog_data->first_curbe_grf_2;
614
615 ps.KernelStartPointer0 = params->wm_prog_kernel;
616 ps.KernelStartPointer2 =
617 params->wm_prog_kernel + prog_data->ksp_offset_2;
618
619 ps._8PixelDispatchEnable = prog_data->dispatch_8;
620 ps._16PixelDispatchEnable = prog_data->dispatch_16;
621
622 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
623 } else {
624 /* Gen7 hardware gets angry if we don't enable at least one dispatch
625 * mode, so just enable 16-pixel dispatch if we don't have a program.
626 */
627 ps._16PixelDispatchEnable = true;
628 }
629
630 if (params->src.addr.buffer)
631 ps.SamplerCount = 1; /* Up to 4 samplers */
632
633 switch (params->fast_clear_op) {
634 case BLORP_FAST_CLEAR_OP_NONE:
635 break;
636 case BLORP_FAST_CLEAR_OP_RESOLVE_FULL:
637 ps.RenderTargetResolveEnable = true;
638 break;
639 case BLORP_FAST_CLEAR_OP_CLEAR:
640 ps.RenderTargetFastClearEnable = true;
641 break;
642 default:
643 unreachable("Invalid fast clear op");
644 }
645 }
646
647 #else /* GEN_GEN <= 6 */
648
649 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
650 wm.MaximumNumberofThreads =
651 batch->blorp->isl_dev->info->max_wm_threads - 1;
652
653 switch (params->hiz_op) {
654 case BLORP_HIZ_OP_DEPTH_CLEAR:
655 wm.DepthBufferClear = true;
656 break;
657 case BLORP_HIZ_OP_DEPTH_RESOLVE:
658 wm.DepthBufferResolveEnable = true;
659 break;
660 case BLORP_HIZ_OP_HIZ_RESOLVE:
661 wm.HierarchicalDepthBufferResolveEnable = true;
662 break;
663 case BLORP_HIZ_OP_NONE:
664 break;
665 default:
666 unreachable("not reached");
667 }
668
669 if (prog_data) {
670 wm.ThreadDispatchEnable = true;
671
672 wm.DispatchGRFStartRegisterforConstantSetupData0 =
673 prog_data->first_curbe_grf_0;
674 wm.DispatchGRFStartRegisterforConstantSetupData2 =
675 prog_data->first_curbe_grf_2;
676
677 wm.KernelStartPointer0 = params->wm_prog_kernel;
678 wm.KernelStartPointer2 =
679 params->wm_prog_kernel + prog_data->ksp_offset_2;
680
681 wm._8PixelDispatchEnable = prog_data->dispatch_8;
682 wm._16PixelDispatchEnable = prog_data->dispatch_16;
683
684 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
685 }
686
687 if (params->src.addr.buffer) {
688 wm.SamplerCount = 1; /* Up to 4 samplers */
689 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
690 }
691
692 if (params->dst.surf.samples > 1) {
693 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
694 wm.MultisampleDispatchMode =
695 (prog_data && prog_data->persample_msaa_dispatch) ?
696 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
697 } else {
698 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
699 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
700 }
701 }
702
703 #endif /* GEN_GEN */
704 }
705
706
707 static void
708 blorp_emit_depth_stencil_config(struct blorp_batch *batch,
709 const struct blorp_params *params)
710 {
711 #if GEN_GEN >= 7
712 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
713 #else
714 const uint32_t mocs = 0;
715 #endif
716
717 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
718 switch (params->depth.surf.dim) {
719 case ISL_SURF_DIM_1D:
720 db.SurfaceType = SURFTYPE_1D;
721 break;
722 case ISL_SURF_DIM_2D:
723 db.SurfaceType = SURFTYPE_2D;
724 break;
725 case ISL_SURF_DIM_3D:
726 db.SurfaceType = SURFTYPE_3D;
727 break;
728 }
729
730 db.SurfaceFormat = params->depth_format;
731
732 #if GEN_GEN >= 7
733 db.DepthWriteEnable = true;
734 #endif
735
736 #if GEN_GEN <= 6
737 db.TiledSurface = true;
738 db.TileWalk = TILEWALK_YMAJOR;
739 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
740 db.SeparateStencilBufferEnable = true;
741 #endif
742
743 db.HierarchicalDepthBufferEnable = true;
744
745 db.Width = params->depth.surf.logical_level0_px.width - 1;
746 db.Height = params->depth.surf.logical_level0_px.height - 1;
747 db.RenderTargetViewExtent = db.Depth =
748 MAX2(params->depth.surf.logical_level0_px.depth,
749 params->depth.surf.logical_level0_px.array_len) - 1;
750
751 db.LOD = params->depth.view.base_level;
752 db.MinimumArrayElement = params->depth.view.base_array_layer;
753
754 db.SurfacePitch = params->depth.surf.row_pitch - 1;
755 db.SurfaceBaseAddress = params->depth.addr;
756 db.DepthBufferMOCS = mocs;
757 }
758
759 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
760 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
761 hiz.SurfaceBaseAddress = params->depth.aux_addr;
762 hiz.HierarchicalDepthBufferMOCS = mocs;
763 }
764
765 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
766 }
767
768 static uint32_t
769 blorp_emit_blend_state(struct blorp_batch *batch,
770 const struct blorp_params *params)
771 {
772 struct GENX(BLEND_STATE) blend;
773 memset(&blend, 0, sizeof(blend));
774
775 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
776 blend.Entry[i].PreBlendColorClampEnable = true;
777 blend.Entry[i].PostBlendColorClampEnable = true;
778 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
779
780 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
781 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
782 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
783 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
784 }
785
786 uint32_t offset;
787 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE,
788 GENX(BLEND_STATE_length) * 4,
789 64, &offset);
790 GENX(BLEND_STATE_pack)(NULL, state, &blend);
791
792 #if GEN_GEN >= 7
793 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
794 sp.BlendStatePointer = offset;
795 #if GEN_GEN >= 8
796 sp.BlendStatePointerValid = true;
797 #endif
798 }
799 #endif
800
801 #if GEN_GEN >= 8
802 blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
803 ps_blend.HasWriteableRT = true;
804 }
805 #endif
806
807 return offset;
808 }
809
810 static uint32_t
811 blorp_emit_color_calc_state(struct blorp_batch *batch,
812 const struct blorp_params *params)
813 {
814 uint32_t offset;
815 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE,
816 GENX(COLOR_CALC_STATE_length) * 4,
817 64, &offset);
818 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
819
820 #if GEN_GEN >= 7
821 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
822 sp.ColorCalcStatePointer = offset;
823 #if GEN_GEN >= 8
824 sp.ColorCalcStatePointerValid = true;
825 #endif
826 }
827 #endif
828
829 return offset;
830 }
831
832 static uint32_t
833 blorp_emit_depth_stencil_state(struct blorp_batch *batch,
834 const struct blorp_params *params)
835 {
836 #if GEN_GEN >= 8
837
838 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
839 blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
840 return 0;
841
842 #else /* GEN_GEN <= 7 */
843
844 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
845 * - 7.5.3.1 Depth Buffer Clear
846 * - 7.5.3.2 Depth Buffer Resolve
847 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
848 */
849 struct GENX(DEPTH_STENCIL_STATE) ds = {
850 .DepthBufferWriteEnable = true,
851 };
852
853 if (params->hiz_op == BLORP_HIZ_OP_DEPTH_RESOLVE) {
854 ds.DepthTestEnable = true;
855 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
856 }
857
858 uint32_t offset;
859 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE,
860 GENX(DEPTH_STENCIL_STATE_length) * 4,
861 64, &offset);
862 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
863
864 #if GEN_GEN >= 7
865 blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
866 sp.PointertoDEPTH_STENCIL_STATE = offset;
867 }
868 #endif
869
870 return offset;
871
872 #endif /* GEN_GEN */
873 }
874
875 struct surface_state_info {
876 unsigned num_dwords;
877 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
878 unsigned reloc_dw;
879 unsigned aux_reloc_dw;
880 };
881
882 static const struct surface_state_info surface_state_infos[] = {
883 [6] = {6, 32, 1, 0},
884 [7] = {8, 32, 1, 6},
885 [8] = {13, 64, 8, 10},
886 [9] = {16, 64, 8, 10},
887 };
888
889 static void
890 blorp_emit_surface_state(struct blorp_batch *batch,
891 const struct brw_blorp_surface_info *surface,
892 uint32_t *state, uint32_t state_offset,
893 bool is_render_target)
894 {
895 const struct surface_state_info ss_info = surface_state_infos[GEN_GEN];
896
897 struct isl_surf surf = surface->surf;
898
899 if (surf.dim == ISL_SURF_DIM_1D &&
900 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
901 assert(surf.logical_level0_px.height == 1);
902 surf.dim = ISL_SURF_DIM_2D;
903 }
904
905 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
906 enum isl_aux_usage aux_usage = surface->aux_usage;
907 if (aux_usage == ISL_AUX_USAGE_HIZ)
908 aux_usage = ISL_AUX_USAGE_NONE;
909
910 const uint32_t mocs =
911 is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
912
913 isl_surf_fill_state(batch->blorp->isl_dev, state,
914 .surf = &surf, .view = &surface->view,
915 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
916 .mocs = mocs, .clear_color = surface->clear_color,
917 .x_offset_sa = surface->tile_x_sa,
918 .y_offset_sa = surface->tile_y_sa);
919
920 blorp_surface_reloc(batch, state_offset + ss_info.reloc_dw * 4,
921 surface->addr, 0);
922
923 if (aux_usage != ISL_AUX_USAGE_NONE) {
924 /* On gen7 and prior, the bottom 12 bits of the MCS base address are
925 * used to store other information. This should be ok, however, because
926 * surface buffer addresses are always 4K page alinged.
927 */
928 assert((surface->aux_addr.offset & 0xfff) == 0);
929 blorp_surface_reloc(batch, state_offset + ss_info.aux_reloc_dw * 4,
930 surface->aux_addr, state[ss_info.aux_reloc_dw]);
931 }
932 }
933
934 static void
935 blorp_emit_surface_states(struct blorp_batch *batch,
936 const struct blorp_params *params)
937 {
938 uint32_t bind_offset, *bind_map;
939 void *surface_maps[2];
940
941 const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
942 const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32;
943
944 unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
945 blorp_alloc_binding_table(batch, num_surfaces, ss_size, ss_align,
946 &bind_offset, &bind_map, surface_maps);
947
948 blorp_emit_surface_state(batch, &params->dst,
949 surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
950 bind_map[BLORP_RENDERBUFFER_BT_INDEX], true);
951 if (params->src.addr.buffer) {
952 blorp_emit_surface_state(batch, &params->src,
953 surface_maps[BLORP_TEXTURE_BT_INDEX],
954 bind_map[BLORP_TEXTURE_BT_INDEX], false);
955 }
956
957 #if GEN_GEN >= 7
958 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
959 bt.PointertoPSBindingTable = bind_offset;
960 }
961 #else
962 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
963 bt.PSBindingTableChange = true;
964 bt.PointertoPSBindingTable = bind_offset;
965 }
966 #endif
967 }
968
969 static void
970 blorp_emit_sampler_state(struct blorp_batch *batch,
971 const struct blorp_params *params)
972 {
973 struct GENX(SAMPLER_STATE) sampler = {
974 .MipModeFilter = MIPFILTER_NONE,
975 .MagModeFilter = MAPFILTER_LINEAR,
976 .MinModeFilter = MAPFILTER_LINEAR,
977 .MinLOD = 0,
978 .MaxLOD = 0,
979 .TCXAddressControlMode = TCM_CLAMP,
980 .TCYAddressControlMode = TCM_CLAMP,
981 .TCZAddressControlMode = TCM_CLAMP,
982 .MaximumAnisotropy = RATIO21,
983 .RAddressMinFilterRoundingEnable = true,
984 .RAddressMagFilterRoundingEnable = true,
985 .VAddressMinFilterRoundingEnable = true,
986 .VAddressMagFilterRoundingEnable = true,
987 .UAddressMinFilterRoundingEnable = true,
988 .UAddressMagFilterRoundingEnable = true,
989 .NonnormalizedCoordinateEnable = true,
990 };
991
992 uint32_t offset;
993 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE,
994 GENX(SAMPLER_STATE_length) * 4,
995 32, &offset);
996 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
997
998 #if GEN_GEN >= 7
999 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1000 ssp.PointertoPSSamplerState = offset;
1001 }
1002 #else
1003 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1004 ssp.VSSamplerStateChange = true;
1005 ssp.GSSamplerStateChange = true;
1006 ssp.PSSamplerStateChange = true;
1007 ssp.PointertoPSSamplerState = offset;
1008 }
1009 #endif
1010 }
1011
1012 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1013 static void
1014 blorp_emit_viewport_state(struct blorp_batch *batch,
1015 const struct blorp_params *params)
1016 {
1017 uint32_t cc_vp_offset;
1018
1019 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE,
1020 GENX(CC_VIEWPORT_length) * 4, 32,
1021 &cc_vp_offset);
1022
1023 GENX(CC_VIEWPORT_pack)(batch, state,
1024 &(struct GENX(CC_VIEWPORT)) {
1025 .MinimumDepth = 0.0,
1026 .MaximumDepth = 1.0,
1027 });
1028
1029 #if GEN_GEN >= 7
1030 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1031 vsp.CCViewportPointer = cc_vp_offset;
1032 }
1033 #else
1034 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1035 vsp.CCViewportStateChange = true;
1036 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1037 }
1038 #endif
1039 }
1040
1041
1042 /**
1043 * \brief Execute a blit or render pass operation.
1044 *
1045 * To execute the operation, this function manually constructs and emits a
1046 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1047 * constructing and after emitting the batch.
1048 *
1049 * This function alters no GL state.
1050 */
1051 static void
1052 blorp_exec(struct blorp_batch *batch, const struct blorp_params *params)
1053 {
1054 uint32_t blend_state_offset = 0;
1055 uint32_t color_calc_state_offset = 0;
1056 uint32_t depth_stencil_state_offset;
1057
1058 blorp_emit_vertex_buffers(batch, params);
1059 blorp_emit_vertex_elements(batch, params);
1060
1061 emit_urb_config(batch, params);
1062
1063 if (params->wm_prog_data) {
1064 blend_state_offset = blorp_emit_blend_state(batch, params);
1065 color_calc_state_offset = blorp_emit_color_calc_state(batch, params);
1066 }
1067 depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);
1068
1069 #if GEN_GEN <= 6
1070 /* 3DSTATE_CC_STATE_POINTERS
1071 *
1072 * The pointer offsets are relative to
1073 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1074 *
1075 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1076 *
1077 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1078 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1079 * one CC_STATE_POINTERS packet so we have to emit that here.
1080 */
1081 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1082 cc.BLEND_STATEChange = true;
1083 cc.COLOR_CALC_STATEChange = true;
1084 cc.DEPTH_STENCIL_STATEChange = true;
1085 cc.PointertoBLEND_STATE = blend_state_offset;
1086 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1087 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1088 }
1089 #else
1090 (void)blend_state_offset;
1091 (void)color_calc_state_offset;
1092 (void)depth_stencil_state_offset;
1093 #endif
1094
1095 blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
1096 #if GEN_GEN >= 7
1097 blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
1098 blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
1099 #endif
1100 blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
1101 blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
1102
1103 if (params->wm_prog_data)
1104 blorp_emit_surface_states(batch, params);
1105
1106 if (params->src.addr.buffer)
1107 blorp_emit_sampler_state(batch, params);
1108
1109 blorp_emit_3dstate_multisample(batch, params->dst.surf.samples);
1110
1111 blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
1112 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1113 }
1114
1115 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1116 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1117 *
1118 * [DevSNB] A pipeline flush must be programmed prior to a
1119 * 3DSTATE_VS command that causes the VS Function Enable to
1120 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1121 * command with CS stall bit set and a post sync operation.
1122 *
1123 * We've already done one at the start of the BLORP operation.
1124 */
1125 blorp_emit(batch, GENX(3DSTATE_VS), vs);
1126 #if GEN_GEN >= 7
1127 blorp_emit(batch, GENX(3DSTATE_HS), hs);
1128 blorp_emit(batch, GENX(3DSTATE_TE), te);
1129 blorp_emit(batch, GENX(3DSTATE_DS), DS);
1130 blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
1131 #endif
1132 blorp_emit(batch, GENX(3DSTATE_GS), gs);
1133
1134 blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
1135 clip.PerspectiveDivideDisable = true;
1136 }
1137
1138 blorp_emit_sf_config(batch, params);
1139 blorp_emit_ps_config(batch, params);
1140
1141 blorp_emit_viewport_state(batch, params);
1142
1143 if (params->depth.addr.buffer) {
1144 blorp_emit_depth_stencil_config(batch, params);
1145 } else {
1146 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
1147 db.SurfaceType = SURFTYPE_NULL;
1148 db.SurfaceFormat = D32_FLOAT;
1149 }
1150 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1151 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
1152 }
1153
1154 /* 3DSTATE_CLEAR_PARAMS
1155 *
1156 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1157 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1158 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1159 */
1160 blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1161 clear.DepthClearValueValid = true;
1162 clear.DepthClearValue = params->depth.clear_color.u32[0];
1163 }
1164
1165 blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1166 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1167 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1168 }
1169
1170 blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
1171 prim.VertexAccessType = SEQUENTIAL;
1172 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1173 prim.VertexCountPerInstance = 3;
1174 prim.InstanceCount = params->num_layers;
1175 }
1176 }