i965/blorp/genX: Move emit_urb_config into another helper
[mesa.git] / src / mesa / drivers / dri / i965 / genX_blorp_exec.c
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31
32 #include "blorp_priv.h"
33
34 #include "genxml/gen_macros.h"
35
36 static void *
37 blorp_emit_dwords(struct brw_context *brw, unsigned n)
38 {
39 intel_batchbuffer_begin(brw, n, RENDER_RING);
40 uint32_t *map = brw->batch.map_next;
41 brw->batch.map_next += n;
42 intel_batchbuffer_advance(brw);
43 return map;
44 }
45
46 struct blorp_address {
47 drm_intel_bo *buffer;
48 uint32_t read_domains;
49 uint32_t write_domain;
50 uint32_t offset;
51 };
52
53 static uint64_t
54 blorp_emit_reloc(struct brw_context *brw, void *location,
55 struct blorp_address address, uint32_t delta)
56 {
57 uint32_t offset = (char *)location - (char *)brw->batch.map;
58 if (brw->gen >= 8) {
59 return intel_batchbuffer_reloc64(brw, address.buffer, offset,
60 address.read_domains,
61 address.write_domain,
62 address.offset + delta);
63 } else {
64 return intel_batchbuffer_reloc(brw, address.buffer, offset,
65 address.read_domains,
66 address.write_domain,
67 address.offset + delta);
68 }
69 }
70
71 static void *
72 blorp_alloc_dynamic_state(struct blorp_context *blorp,
73 enum aub_state_struct_type type,
74 uint32_t size,
75 uint32_t alignment,
76 uint32_t *offset)
77 {
78 struct brw_context *brw = blorp->driver_ctx;
79 return brw_state_batch(brw, type, size, alignment, offset);
80 }
81
82 static void *
83 blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size,
84 struct blorp_address *addr)
85 {
86 struct brw_context *brw = blorp->driver_ctx;
87
88 uint32_t offset;
89 void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
90 size, 32, &offset);
91
92 *addr = (struct blorp_address) {
93 .buffer = brw->batch.bo,
94 .read_domains = I915_GEM_DOMAIN_VERTEX,
95 .write_domain = 0,
96 .offset = offset,
97 };
98
99 return data;
100 }
101
102 static void
103 blorp_emit_urb_config(struct brw_context *brw, unsigned vs_entry_size)
104 {
105 #if GEN_GEN >= 7
106 if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
107 brw->urb.vsize >= vs_entry_size)
108 return;
109
110 brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
111
112 gen7_upload_urb(brw, vs_entry_size, false, false);
113 #else
114 gen6_upload_urb(brw, vs_entry_size, false, 0);
115 #endif
116 }
117
118 static void
119 blorp_emit_3dstate_multisample(struct brw_context *brw, unsigned samples)
120 {
121 #if GEN_GEN >= 8
122 gen8_emit_3dstate_multisample(brw, samples);
123 #else
124 gen6_emit_3dstate_multisample(brw, samples);
125 #endif
126 }
127
128 #define __gen_address_type struct blorp_address
129 #define __gen_user_data struct brw_context
130
131 static uint64_t
132 __gen_combine_address(struct brw_context *brw, void *location,
133 struct blorp_address address, uint32_t delta)
134 {
135 if (address.buffer == NULL) {
136 return address.offset + delta;
137 } else {
138 return blorp_emit_reloc(brw, location, address, delta);
139 }
140 }
141
142 #include "genxml/genX_pack.h"
143
144 #define _blorp_cmd_length(cmd) cmd ## _length
145 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
146 #define _blorp_cmd_header(cmd) cmd ## _header
147 #define _blorp_cmd_pack(cmd) cmd ## _pack
148
149 #define blorp_emit(brw, cmd, name) \
150 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
151 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \
152 __builtin_expect(_dst != NULL, 1); \
153 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \
154 _dst = NULL)
155
156 #define blorp_emitn(batch, cmd, n) ({ \
157 uint32_t *_dw = blorp_emit_dwords(batch, n); \
158 struct cmd template = { \
159 _blorp_cmd_header(cmd), \
160 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
161 }; \
162 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
163 _dw + 1; /* Array starts at dw[1] */ \
164 })
165
166 /* Once vertex fetcher has written full VUE entries with complete
167 * header the space requirement is as follows per vertex (in bytes):
168 *
169 * Header Position Program constants
170 * +--------+------------+-------------------+
171 * | 16 | 16 | n x 16 |
172 * +--------+------------+-------------------+
173 *
174 * where 'n' stands for number of varying inputs expressed as vec4s.
175 *
176 * The URB size is in turn expressed in 64 bytes (512 bits).
177 */
178 static inline unsigned
179 gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
180 {
181 const unsigned num_varyings =
182 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
183 const unsigned total_needed = 16 + 16 + num_varyings * 16;
184
185 return DIV_ROUND_UP(total_needed, 64);
186 }
187
188 /* 3DSTATE_URB
189 * 3DSTATE_URB_VS
190 * 3DSTATE_URB_HS
191 * 3DSTATE_URB_DS
192 * 3DSTATE_URB_GS
193 *
194 * Assign the entire URB to the VS. Even though the VS disabled, URB space
195 * is still needed because the clipper loads the VUE's from the URB. From
196 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
197 * Dword 1.15:0 "VS Number of URB Entries":
198 * This field is always used (even if VS Function Enable is DISABLED).
199 *
200 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
201 * safely ignore it because this batch contains only one draw call.
202 * Because of URB corruption caused by allocating a previous GS unit
203 * URB entry to the VS unit, software is required to send a “GS NULL
204 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
205 * plus a dummy DRAW call before any case where VS will be taking over
206 * GS URB space.
207 *
208 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
209 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
210 *
211 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
212 * programmed in order for the programming of this state to be
213 * valid.
214 */
215 static void
216 emit_urb_config(struct brw_context *brw,
217 const struct brw_blorp_params *params)
218 {
219 blorp_emit_urb_config(brw, gen7_blorp_get_vs_entry_size(params));
220 }
221
222 static void
223 blorp_emit_vertex_data(struct brw_context *brw,
224 const struct brw_blorp_params *params,
225 struct blorp_address *addr,
226 uint32_t *size)
227 {
228 const float vertices[] = {
229 /* v0 */ (float)params->x0, (float)params->y1,
230 /* v1 */ (float)params->x1, (float)params->y1,
231 /* v2 */ (float)params->x0, (float)params->y0,
232 };
233
234 void *data = blorp_alloc_vertex_buffer(&brw->blorp, sizeof(vertices), addr);
235 memcpy(data, vertices, sizeof(vertices));
236 *size = sizeof(vertices);
237 }
238
239 static void
240 blorp_emit_input_varying_data(struct brw_context *brw,
241 const struct brw_blorp_params *params,
242 struct blorp_address *addr,
243 uint32_t *size)
244 {
245 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
246 const unsigned max_num_varyings =
247 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
248 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
249
250 *size = num_varyings * vec4_size_in_bytes;
251
252 const float *const inputs_src = (const float *)&params->wm_inputs;
253 float *inputs = blorp_alloc_vertex_buffer(&brw->blorp, *size, addr);
254
255 /* Walk over the attribute slots, determine if the attribute is used by
256 * the program and when necessary copy the values from the input storage to
257 * the vertex data buffer.
258 */
259 for (unsigned i = 0; i < max_num_varyings; i++) {
260 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
261
262 if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
263 continue;
264
265 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
266
267 inputs += 4;
268 }
269 }
270
271 static void
272 blorp_emit_vertex_buffers(struct brw_context *brw,
273 const struct brw_blorp_params *params)
274 {
275 struct GENX(VERTEX_BUFFER_STATE) vb[2];
276 memset(vb, 0, sizeof(vb));
277
278 unsigned num_buffers = 1;
279
280 #if GEN_GEN == 9
281 uint32_t mocs = (2 << 1); /* SKL_MOCS_WB */
282 #elif GEN_GEN == 8
283 uint32_t mocs = 0x78; /* BDW_MOCS_WB */
284 #elif GEN_GEN == 7
285 uint32_t mocs = 1; /* GEN7_MOCS_L3 */
286 #else
287 uint32_t mocs = 0;
288 #endif
289
290 uint32_t size;
291 blorp_emit_vertex_data(brw, params, &vb[0].BufferStartingAddress, &size);
292 vb[0].VertexBufferIndex = 0;
293 vb[0].BufferPitch = 2 * sizeof(float);
294 vb[0].VertexBufferMOCS = mocs;
295 #if GEN_GEN >= 7
296 vb[0].AddressModifyEnable = true;
297 #endif
298 #if GEN_GEN >= 8
299 vb[0].BufferSize = size;
300 #else
301 vb[0].BufferAccessType = VERTEXDATA;
302 vb[0].EndAddress = vb[0].BufferStartingAddress;
303 vb[0].EndAddress.offset += size - 1;
304 #endif
305
306 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
307 blorp_emit_input_varying_data(brw, params,
308 &vb[1].BufferStartingAddress, &size);
309 vb[1].VertexBufferIndex = 1;
310 vb[1].BufferPitch = 0;
311 vb[1].VertexBufferMOCS = mocs;
312 #if GEN_GEN >= 7
313 vb[1].AddressModifyEnable = true;
314 #endif
315 #if GEN_GEN >= 8
316 vb[1].BufferSize = size;
317 #else
318 vb[1].BufferAccessType = INSTANCEDATA;
319 vb[1].EndAddress = vb[1].BufferStartingAddress;
320 vb[1].EndAddress.offset += size - 1;
321 #endif
322 num_buffers++;
323 }
324
325 const unsigned num_dwords =
326 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
327 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
328
329 for (unsigned i = 0; i < num_buffers; i++) {
330 GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &vb[i]);
331 dw += GENX(VERTEX_BUFFER_STATE_length);
332 }
333 }
334
335 static void
336 blorp_emit_vertex_elements(struct brw_context *brw,
337 const struct brw_blorp_params *params)
338 {
339 const unsigned num_varyings =
340 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
341 const unsigned num_elements = 2 + num_varyings;
342
343 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
344 memset(ve, 0, num_elements * sizeof(*ve));
345
346 /* Setup VBO for the rectangle primitive..
347 *
348 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
349 * vertices. The vertices reside in screen space with DirectX
350 * coordinates (that is, (0, 0) is the upper left corner).
351 *
352 * v2 ------ implied
353 * | |
354 * | |
355 * v0 ----- v1
356 *
357 * Since the VS is disabled, the clipper loads each VUE directly from
358 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
359 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
360 * dw0: Reserved, MBZ.
361 * dw1: Render Target Array Index. The HiZ op does not use indexed
362 * vertices, so set the dword to 0.
363 * dw2: Viewport Index. The HiZ op disables viewport mapping and
364 * scissoring, so set the dword to 0.
365 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
366 * so set the dword to 0.
367 * dw4: Vertex Position X.
368 * dw5: Vertex Position Y.
369 * dw6: Vertex Position Z.
370 * dw7: Vertex Position W.
371 *
372 * dw8: Flat vertex input 0
373 * dw9: Flat vertex input 1
374 * ...
375 * dwn: Flat vertex input n - 8
376 *
377 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
378 * "Vertex URB Entry (VUE) Formats".
379 *
380 * Only vertex position X and Y are going to be variable, Z is fixed to
381 * zero and W to one. Header words dw0-3 are all zero. There is no need to
382 * include the fixed values in the vertex buffer. Vertex fetcher can be
383 * instructed to fill vertex elements with constant values of one and zero
384 * instead of reading them from the buffer.
385 * Flat inputs are program constants that are not interpolated. Moreover
386 * their values will be the same between vertices.
387 *
388 * See the vertex element setup below.
389 */
390 ve[0].VertexBufferIndex = 0;
391 ve[0].Valid = true;
392 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
393 ve[0].SourceElementOffset = 0;
394 ve[0].Component0Control = VFCOMP_STORE_0;
395 ve[0].Component1Control = VFCOMP_STORE_0;
396 ve[0].Component2Control = VFCOMP_STORE_0;
397 ve[0].Component3Control = VFCOMP_STORE_0;
398
399 ve[1].VertexBufferIndex = 0;
400 ve[1].Valid = true;
401 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
402 ve[1].SourceElementOffset = 0;
403 ve[1].Component0Control = VFCOMP_STORE_SRC;
404 ve[1].Component1Control = VFCOMP_STORE_SRC;
405 ve[1].Component2Control = VFCOMP_STORE_0;
406 ve[1].Component3Control = VFCOMP_STORE_1_FP;
407
408 for (unsigned i = 0; i < num_varyings; ++i) {
409 ve[i + 2].VertexBufferIndex = 1;
410 ve[i + 2].Valid = true;
411 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
412 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
413 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
414 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
415 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
416 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
417 }
418
419 const unsigned num_dwords =
420 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
421 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
422
423 for (unsigned i = 0; i < num_elements; i++) {
424 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]);
425 dw += GENX(VERTEX_ELEMENT_STATE_length);
426 }
427
428 #if GEN_GEN >= 8
429 blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs);
430
431 for (unsigned i = 0; i < num_elements; i++) {
432 blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) {
433 vf.VertexElementIndex = i;
434 vf.InstancingEnable = false;
435 }
436 }
437
438 blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) {
439 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
440 }
441 #endif
442 }
443
444 static void
445 blorp_emit_sf_config(struct brw_context *brw,
446 const struct brw_blorp_params *params)
447 {
448 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
449
450 /* 3DSTATE_SF
451 *
452 * Disable ViewportTransformEnable (dw2.1)
453 *
454 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
455 * Primitives Overview":
456 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
457 * use of screen- space coordinates).
458 *
459 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
460 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
461 *
462 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
463 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
464 * SOLID: Any triangle or rectangle object found to be front-facing
465 * is rendered as a solid object. This setting is required when
466 * (rendering rectangle (RECTLIST) objects.
467 */
468
469 #if GEN_GEN >= 8
470
471 blorp_emit(brw, GENX(3DSTATE_SF), sf);
472
473 blorp_emit(brw, GENX(3DSTATE_RASTER), raster) {
474 raster.CullMode = CULLMODE_NONE;
475 }
476
477 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
478 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
479 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
480 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
481 sbe.ForceVertexURBEntryReadLength = true;
482 sbe.ForceVertexURBEntryReadOffset = true;
483 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
484
485 #if GEN_GEN >= 9
486 for (unsigned i = 0; i < 32; i++)
487 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
488 #endif
489 }
490
491 #elif GEN_GEN >= 7
492
493 blorp_emit(brw, GENX(3DSTATE_SF), sf) {
494 sf.FrontFaceFillMode = FILL_MODE_SOLID;
495 sf.BackFaceFillMode = FILL_MODE_SOLID;
496
497 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
498 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
499
500 #if GEN_GEN == 7
501 sf.DepthBufferSurfaceFormat = params->depth_format;
502 #endif
503 }
504
505 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
506 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
507 if (prog_data) {
508 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
509 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
510 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
511 } else {
512 sbe.NumberofSFOutputAttributes = 0;
513 sbe.VertexURBEntryReadLength = 1;
514 }
515 }
516
517 #else /* GEN_GEN <= 6 */
518
519 blorp_emit(brw, GENX(3DSTATE_SF), sf) {
520 sf.FrontFaceFillMode = FILL_MODE_SOLID;
521 sf.BackFaceFillMode = FILL_MODE_SOLID;
522
523 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
524 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
525
526 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
527 if (prog_data) {
528 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
529 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
530 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
531 } else {
532 sf.NumberofSFOutputAttributes = 0;
533 sf.VertexURBEntryReadLength = 1;
534 }
535 }
536
537 #endif /* GEN_GEN */
538 }
539
540 static void
541 blorp_emit_ps_config(struct brw_context *brw,
542 const struct brw_blorp_params *params)
543 {
544 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
545
546 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
547 * nonzero to prevent the GPU from hanging. While the documentation doesn't
548 * mention this explicitly, it notes that the valid range for the field is
549 * [1,39] = [2,40] threads, which excludes zero.
550 *
551 * To be safe (and to minimize extraneous code) we go ahead and fully
552 * configure the WM state whether or not there is a WM program.
553 */
554
555 #if GEN_GEN >= 8
556
557 blorp_emit(brw, GENX(3DSTATE_WM), wm);
558
559 blorp_emit(brw, GENX(3DSTATE_PS), ps) {
560 if (params->src.bo) {
561 ps.SamplerCount = 1; /* Up to 4 samplers */
562 ps.BindingTableEntryCount = 2;
563 } else {
564 ps.BindingTableEntryCount = 1;
565 }
566
567 ps.DispatchGRFStartRegisterForConstantSetupData0 =
568 prog_data->first_curbe_grf_0;
569 ps.DispatchGRFStartRegisterForConstantSetupData2 =
570 prog_data->first_curbe_grf_2;
571
572 ps._8PixelDispatchEnable = prog_data->dispatch_8;
573 ps._16PixelDispatchEnable = prog_data->dispatch_16;
574
575 ps.KernelStartPointer0 = params->wm_prog_kernel;
576 ps.KernelStartPointer2 =
577 params->wm_prog_kernel + prog_data->ksp_offset_2;
578
579 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
580 * it implicitly scales for different GT levels (which have some # of
581 * PSDs).
582 *
583 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
584 */
585 if (GEN_GEN >= 9)
586 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
587 else
588 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
589
590 switch (params->fast_clear_op) {
591 #if GEN_GEN >= 9
592 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
593 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
594 break;
595 case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
596 ps.RenderTargetResolveType = RESOLVE_FULL;
597 break;
598 #else
599 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
600 ps.RenderTargetResolveEnable = true;
601 break;
602 #endif
603 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
604 ps.RenderTargetFastClearEnable = true;
605 break;
606 }
607 }
608
609 blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
610 psx.PixelShaderValid = true;
611
612 if (params->src.bo)
613 psx.PixelShaderKillsPixel = true;
614
615 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
616
617 if (prog_data && prog_data->persample_msaa_dispatch)
618 psx.PixelShaderIsPerSample = true;
619 }
620
621 #elif GEN_GEN >= 7
622
623 blorp_emit(brw, GENX(3DSTATE_WM), wm) {
624 switch (params->hiz_op) {
625 case GEN6_HIZ_OP_DEPTH_CLEAR:
626 wm.DepthBufferClear = true;
627 break;
628 case GEN6_HIZ_OP_DEPTH_RESOLVE:
629 wm.DepthBufferResolveEnable = true;
630 break;
631 case GEN6_HIZ_OP_HIZ_RESOLVE:
632 wm.HierarchicalDepthBufferResolveEnable = true;
633 break;
634 case GEN6_HIZ_OP_NONE:
635 break;
636 default:
637 unreachable("not reached");
638 }
639
640 if (prog_data)
641 wm.ThreadDispatchEnable = true;
642
643 if (params->src.bo)
644 wm.PixelShaderKillPixel = true;
645
646 if (params->dst.surf.samples > 1) {
647 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
648 wm.MultisampleDispatchMode =
649 (prog_data && prog_data->persample_msaa_dispatch) ?
650 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
651 } else {
652 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
653 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
654 }
655 }
656
657 blorp_emit(brw, GENX(3DSTATE_PS), ps) {
658 ps.MaximumNumberofThreads = brw->max_wm_threads - 1;
659
660 #if GEN_IS_HASWELL
661 ps.SampleMask = 1;
662 #endif
663
664 if (prog_data) {
665 ps.DispatchGRFStartRegisterforConstantSetupData0 =
666 prog_data->first_curbe_grf_0;
667 ps.DispatchGRFStartRegisterforConstantSetupData2 =
668 prog_data->first_curbe_grf_2;
669
670 ps.KernelStartPointer0 = params->wm_prog_kernel;
671 ps.KernelStartPointer2 =
672 params->wm_prog_kernel + prog_data->ksp_offset_2;
673
674 ps._8PixelDispatchEnable = prog_data->dispatch_8;
675 ps._16PixelDispatchEnable = prog_data->dispatch_16;
676
677 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
678 } else {
679 /* Gen7 hardware gets angry if we don't enable at least one dispatch
680 * mode, so just enable 16-pixel dispatch if we don't have a program.
681 */
682 ps._16PixelDispatchEnable = true;
683 }
684
685 if (params->src.bo)
686 ps.SamplerCount = 1; /* Up to 4 samplers */
687
688 switch (params->fast_clear_op) {
689 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
690 ps.RenderTargetResolveEnable = true;
691 break;
692 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
693 ps.RenderTargetFastClearEnable = true;
694 break;
695 }
696 }
697
698 #else /* GEN_GEN <= 6 */
699
700 blorp_emit(brw, GENX(3DSTATE_WM), wm) {
701 wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
702
703 switch (params->hiz_op) {
704 case GEN6_HIZ_OP_DEPTH_CLEAR:
705 wm.DepthBufferClear = true;
706 break;
707 case GEN6_HIZ_OP_DEPTH_RESOLVE:
708 wm.DepthBufferResolveEnable = true;
709 break;
710 case GEN6_HIZ_OP_HIZ_RESOLVE:
711 wm.HierarchicalDepthBufferResolveEnable = true;
712 break;
713 case GEN6_HIZ_OP_NONE:
714 break;
715 default:
716 unreachable("not reached");
717 }
718
719 if (prog_data) {
720 wm.ThreadDispatchEnable = true;
721
722 wm.DispatchGRFStartRegisterforConstantSetupData0 =
723 prog_data->first_curbe_grf_0;
724 wm.DispatchGRFStartRegisterforConstantSetupData2 =
725 prog_data->first_curbe_grf_2;
726
727 wm.KernelStartPointer0 = params->wm_prog_kernel;
728 wm.KernelStartPointer2 =
729 params->wm_prog_kernel + prog_data->ksp_offset_2;
730
731 wm._8PixelDispatchEnable = prog_data->dispatch_8;
732 wm._16PixelDispatchEnable = prog_data->dispatch_16;
733
734 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
735 }
736
737 if (params->src.bo) {
738 wm.SamplerCount = 1; /* Up to 4 samplers */
739 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
740 }
741
742 if (params->dst.surf.samples > 1) {
743 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
744 wm.MultisampleDispatchMode =
745 (prog_data && prog_data->persample_msaa_dispatch) ?
746 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
747 } else {
748 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
749 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
750 }
751 }
752
753 #endif /* GEN_GEN */
754 }
755
756
757 static void
758 blorp_emit_depth_stencil_config(struct brw_context *brw,
759 const struct brw_blorp_params *params)
760 {
761 brw_emit_depth_stall_flushes(brw);
762
763 #if GEN_GEN >= 7
764 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
765 #else
766 const uint32_t mocs = 0;
767 #endif
768
769 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
770 switch (params->depth.surf.dim) {
771 case ISL_SURF_DIM_1D:
772 db.SurfaceType = SURFTYPE_1D;
773 break;
774 case ISL_SURF_DIM_2D:
775 db.SurfaceType = SURFTYPE_2D;
776 break;
777 case ISL_SURF_DIM_3D:
778 db.SurfaceType = SURFTYPE_3D;
779 break;
780 }
781
782 db.SurfaceFormat = params->depth_format;
783
784 #if GEN_GEN >= 7
785 db.DepthWriteEnable = true;
786 #endif
787
788 #if GEN_GEN <= 6
789 db.TiledSurface = true;
790 db.TileWalk = TILEWALK_YMAJOR;
791 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
792 db.SeparateStencilBufferEnable = true;
793 #endif
794
795 db.HierarchicalDepthBufferEnable = true;
796
797 db.Width = params->depth.surf.logical_level0_px.width - 1;
798 db.Height = params->depth.surf.logical_level0_px.height - 1;
799 db.RenderTargetViewExtent = db.Depth =
800 MAX2(params->depth.surf.logical_level0_px.depth,
801 params->depth.surf.logical_level0_px.array_len) - 1;
802
803 db.LOD = params->depth.view.base_level;
804 db.MinimumArrayElement = params->depth.view.base_array_layer;
805
806 db.SurfacePitch = params->depth.surf.row_pitch - 1;
807 db.SurfaceBaseAddress = (struct blorp_address) {
808 .buffer = params->depth.bo,
809 .read_domains = I915_GEM_DOMAIN_RENDER,
810 .write_domain = I915_GEM_DOMAIN_RENDER,
811 .offset = params->depth.offset,
812 };
813 db.DepthBufferMOCS = mocs;
814 }
815
816 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
817 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
818 hiz.SurfaceBaseAddress = (struct blorp_address) {
819 .buffer = params->depth.aux_bo,
820 .read_domains = I915_GEM_DOMAIN_RENDER,
821 .write_domain = I915_GEM_DOMAIN_RENDER,
822 .offset = params->depth.aux_offset,
823 };
824 hiz.HierarchicalDepthBufferMOCS = mocs;
825 }
826
827 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
828 }
829
830 static uint32_t
831 blorp_emit_blend_state(struct brw_context *brw,
832 const struct brw_blorp_params *params)
833 {
834 struct GENX(BLEND_STATE) blend;
835 memset(&blend, 0, sizeof(blend));
836
837 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
838 blend.Entry[i].PreBlendColorClampEnable = true;
839 blend.Entry[i].PostBlendColorClampEnable = true;
840 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
841
842 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
843 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
844 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
845 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
846 }
847
848 uint32_t offset;
849 void *state = blorp_alloc_dynamic_state(&brw->blorp,
850 AUB_TRACE_BLEND_STATE,
851 GENX(BLEND_STATE_length) * 4,
852 64, &offset);
853 GENX(BLEND_STATE_pack)(NULL, state, &blend);
854
855 #if GEN_GEN >= 7
856 blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
857 sp.BlendStatePointer = offset;
858 #if GEN_GEN >= 8
859 sp.BlendStatePointerValid = true;
860 #endif
861 }
862 #endif
863
864 #if GEN_GEN >= 8
865 blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) {
866 ps_blend.HasWriteableRT = true;
867 }
868 #endif
869
870 return offset;
871 }
872
873 static uint32_t
874 blorp_emit_color_calc_state(struct brw_context *brw,
875 const struct brw_blorp_params *params)
876 {
877 uint32_t offset;
878 void *state = blorp_alloc_dynamic_state(&brw->blorp,
879 AUB_TRACE_CC_STATE,
880 GENX(COLOR_CALC_STATE_length) * 4,
881 64, &offset);
882 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
883
884 #if GEN_GEN >= 7
885 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
886 sp.ColorCalcStatePointer = offset;
887 #if GEN_GEN >= 8
888 sp.ColorCalcStatePointerValid = true;
889 #endif
890 }
891 #endif
892
893 return offset;
894 }
895
896 static uint32_t
897 blorp_emit_depth_stencil_state(struct brw_context *brw,
898 const struct brw_blorp_params *params)
899 {
900 #if GEN_GEN >= 8
901
902 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
903 blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
904 return 0;
905
906 #else /* GEN_GEN <= 7 */
907
908 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
909 * - 7.5.3.1 Depth Buffer Clear
910 * - 7.5.3.2 Depth Buffer Resolve
911 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
912 */
913 struct GENX(DEPTH_STENCIL_STATE) ds = {
914 .DepthBufferWriteEnable = true,
915 };
916
917 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
918 ds.DepthTestEnable = true;
919 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
920 }
921
922 uint32_t offset;
923 void *state = blorp_alloc_dynamic_state(&brw->blorp,
924 AUB_TRACE_DEPTH_STENCIL_STATE,
925 GENX(DEPTH_STENCIL_STATE_length) * 4,
926 64, &offset);
927 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
928
929 #if GEN_GEN >= 7
930 blorp_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
931 sp.PointertoDEPTH_STENCIL_STATE = offset;
932 }
933 #endif
934
935 return offset;
936
937 #endif /* GEN_GEN */
938 }
939
940 static void
941 blorp_emit_surface_states(struct brw_context *brw,
942 const struct brw_blorp_params *params)
943 {
944 uint32_t bind_offset;
945 uint32_t *bind =
946 brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
947 sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
948 32, /* alignment */ &bind_offset);
949
950 bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
951 brw_blorp_emit_surface_state(brw, &params->dst,
952 I915_GEM_DOMAIN_RENDER,
953 I915_GEM_DOMAIN_RENDER, true);
954 if (params->src.bo) {
955 bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] =
956 brw_blorp_emit_surface_state(brw, &params->src,
957 I915_GEM_DOMAIN_SAMPLER, 0, false);
958 }
959
960 #if GEN_GEN >= 7
961 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
962 bt.PointertoPSBindingTable = bind_offset;
963 }
964 #else
965 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
966 bt.PSBindingTableChange = true;
967 bt.PointertoPSBindingTable = bind_offset;
968 }
969 #endif
970 }
971
972 static void
973 blorp_emit_sampler_state(struct brw_context *brw,
974 const struct brw_blorp_params *params)
975 {
976 struct GENX(SAMPLER_STATE) sampler = {
977 .MipModeFilter = MIPFILTER_NONE,
978 .MagModeFilter = MAPFILTER_LINEAR,
979 .MinModeFilter = MAPFILTER_LINEAR,
980 .MinLOD = 0,
981 .MaxLOD = 0,
982 .TCXAddressControlMode = TCM_CLAMP,
983 .TCYAddressControlMode = TCM_CLAMP,
984 .TCZAddressControlMode = TCM_CLAMP,
985 .MaximumAnisotropy = RATIO21,
986 .RAddressMinFilterRoundingEnable = true,
987 .RAddressMagFilterRoundingEnable = true,
988 .VAddressMinFilterRoundingEnable = true,
989 .VAddressMagFilterRoundingEnable = true,
990 .UAddressMinFilterRoundingEnable = true,
991 .UAddressMagFilterRoundingEnable = true,
992 .NonnormalizedCoordinateEnable = true,
993 };
994
995 uint32_t offset;
996 void *state = blorp_alloc_dynamic_state(&brw->blorp,
997 AUB_TRACE_SAMPLER_STATE,
998 GENX(SAMPLER_STATE_length) * 4,
999 32, &offset);
1000 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
1001
1002 #if GEN_GEN >= 7
1003 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1004 ssp.PointertoPSSamplerState = offset;
1005 }
1006 #else
1007 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1008 ssp.VSSamplerStateChange = true;
1009 ssp.GSSamplerStateChange = true;
1010 ssp.PSSamplerStateChange = true;
1011 ssp.PointertoPSSamplerState = offset;
1012 }
1013 #endif
1014 }
1015
1016 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1017 static void
1018 blorp_emit_viewport_state(struct brw_context *brw,
1019 const struct brw_blorp_params *params)
1020 {
1021 uint32_t cc_vp_offset;
1022
1023 void *state = blorp_alloc_dynamic_state(&brw->blorp,
1024 AUB_TRACE_CC_VP_STATE,
1025 GENX(CC_VIEWPORT_length) * 4, 32,
1026 &cc_vp_offset);
1027
1028 GENX(CC_VIEWPORT_pack)(brw, state,
1029 &(struct GENX(CC_VIEWPORT)) {
1030 .MinimumDepth = 0.0,
1031 .MaximumDepth = 1.0,
1032 });
1033
1034 #if GEN_GEN >= 7
1035 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1036 vsp.CCViewportPointer = cc_vp_offset;
1037 }
1038 #else
1039 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1040 vsp.CCViewportStateChange = true;
1041 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1042 }
1043 #endif
1044 }
1045
1046
1047 /**
1048 * \brief Execute a blit or render pass operation.
1049 *
1050 * To execute the operation, this function manually constructs and emits a
1051 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1052 * constructing and after emitting the batch.
1053 *
1054 * This function alters no GL state.
1055 */
1056 void
1057 genX(blorp_exec)(struct brw_context *brw,
1058 const struct brw_blorp_params *params)
1059 {
1060 uint32_t blend_state_offset = 0;
1061 uint32_t color_calc_state_offset = 0;
1062 uint32_t depth_stencil_state_offset;
1063
1064 #if GEN_GEN == 6
1065 /* Emit workaround flushes when we switch from drawing to blorping. */
1066 brw_emit_post_sync_nonzero_flush(brw);
1067 #endif
1068
1069 brw_upload_state_base_address(brw);
1070
1071 #if GEN_GEN >= 8
1072 gen7_l3_state.emit(brw);
1073 #endif
1074
1075 blorp_emit_vertex_buffers(brw, params);
1076 blorp_emit_vertex_elements(brw, params);
1077
1078 emit_urb_config(brw, params);
1079
1080 if (params->wm_prog_data) {
1081 blend_state_offset = blorp_emit_blend_state(brw, params);
1082 color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
1083 }
1084 depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
1085
1086 #if GEN_GEN <= 6
1087 /* 3DSTATE_CC_STATE_POINTERS
1088 *
1089 * The pointer offsets are relative to
1090 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1091 *
1092 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1093 *
1094 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1095 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1096 * one CC_STATE_POINTERS packet so we have to emit that here.
1097 */
1098 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1099 cc.BLEND_STATEChange = true;
1100 cc.COLOR_CALC_STATEChange = true;
1101 cc.DEPTH_STENCIL_STATEChange = true;
1102 cc.PointertoBLEND_STATE = blend_state_offset;
1103 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1104 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1105 }
1106 #else
1107 (void)blend_state_offset;
1108 (void)color_calc_state_offset;
1109 (void)depth_stencil_state_offset;
1110 #endif
1111
1112 blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
1113 #if GEN_GEN >= 7
1114 blorp_emit(brw, GENX(3DSTATE_CONSTANT_HS), hs);
1115 blorp_emit(brw, GENX(3DSTATE_CONSTANT_DS), DS);
1116 #endif
1117 blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
1118 blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
1119
1120 if (brw->use_resource_streamer)
1121 gen7_disable_hw_binding_tables(brw);
1122
1123 if (params->wm_prog_data)
1124 blorp_emit_surface_states(brw, params);
1125
1126 if (params->src.bo)
1127 blorp_emit_sampler_state(brw, params);
1128
1129 blorp_emit_3dstate_multisample(brw, params->dst.surf.samples);
1130
1131 blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
1132 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1133 }
1134
1135 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1136 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1137 *
1138 * [DevSNB] A pipeline flush must be programmed prior to a
1139 * 3DSTATE_VS command that causes the VS Function Enable to
1140 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1141 * command with CS stall bit set and a post sync operation.
1142 *
1143 * We've already done one at the start of the BLORP operation.
1144 */
1145 blorp_emit(brw, GENX(3DSTATE_VS), vs);
1146 #if GEN_GEN >= 7
1147 blorp_emit(brw, GENX(3DSTATE_HS), hs);
1148 blorp_emit(brw, GENX(3DSTATE_TE), te);
1149 blorp_emit(brw, GENX(3DSTATE_DS), DS);
1150 blorp_emit(brw, GENX(3DSTATE_STREAMOUT), so);
1151 #endif
1152 blorp_emit(brw, GENX(3DSTATE_GS), gs);
1153
1154 blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
1155 clip.PerspectiveDivideDisable = true;
1156 }
1157
1158 blorp_emit_sf_config(brw, params);
1159 blorp_emit_ps_config(brw, params);
1160
1161 blorp_emit_viewport_state(brw, params);
1162
1163 if (params->depth.bo) {
1164 blorp_emit_depth_stencil_config(brw, params);
1165 } else {
1166 brw_emit_depth_stall_flushes(brw);
1167
1168 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
1169 db.SurfaceType = SURFTYPE_NULL;
1170 db.SurfaceFormat = D32_FLOAT;
1171 }
1172 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1173 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
1174 }
1175
1176 /* 3DSTATE_CLEAR_PARAMS
1177 *
1178 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1179 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1180 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1181 */
1182 blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1183 clear.DepthClearValueValid = true;
1184 clear.DepthClearValue = params->depth.clear_color.u32[0];
1185 }
1186
1187 blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1188 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1189 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1190 }
1191
1192 blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
1193 prim.VertexAccessType = SEQUENTIAL;
1194 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1195 prim.VertexCountPerInstance = 3;
1196 prim.InstanceCount = params->num_layers;
1197 }
1198 }