ea4c07681bcb6fd0ea7f1726259548b99b825e3e
[mesa.git] / src / mesa / drivers / dri / i965 / genX_blorp_exec.c
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31
32 #include "blorp_priv.h"
33
34 #include "genxml/gen_macros.h"
35
36 static void *
37 blorp_emit_dwords(struct brw_context *brw, unsigned n)
38 {
39 intel_batchbuffer_begin(brw, n, RENDER_RING);
40 uint32_t *map = brw->batch.map_next;
41 brw->batch.map_next += n;
42 intel_batchbuffer_advance(brw);
43 return map;
44 }
45
46 struct blorp_address {
47 drm_intel_bo *buffer;
48 uint32_t read_domains;
49 uint32_t write_domain;
50 uint32_t offset;
51 };
52
53 static uint64_t
54 blorp_emit_reloc(struct brw_context *brw, void *location,
55 struct blorp_address address, uint32_t delta)
56 {
57 uint32_t offset = (char *)location - (char *)brw->batch.map;
58 if (brw->gen >= 8) {
59 return intel_batchbuffer_reloc64(brw, address.buffer, offset,
60 address.read_domains,
61 address.write_domain,
62 address.offset + delta);
63 } else {
64 return intel_batchbuffer_reloc(brw, address.buffer, offset,
65 address.read_domains,
66 address.write_domain,
67 address.offset + delta);
68 }
69 }
70
71 static void *
72 blorp_alloc_dynamic_state(struct blorp_context *blorp,
73 enum aub_state_struct_type type,
74 uint32_t size,
75 uint32_t alignment,
76 uint32_t *offset)
77 {
78 struct brw_context *brw = blorp->driver_ctx;
79 return brw_state_batch(brw, type, size, alignment, offset);
80 }
81
82 static void *
83 blorp_alloc_vertex_buffer(struct blorp_context *blorp, uint32_t size,
84 struct blorp_address *addr)
85 {
86 struct brw_context *brw = blorp->driver_ctx;
87
88 uint32_t offset;
89 void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
90 size, 32, &offset);
91
92 *addr = (struct blorp_address) {
93 .buffer = brw->batch.bo,
94 .read_domains = I915_GEM_DOMAIN_VERTEX,
95 .write_domain = 0,
96 .offset = offset,
97 };
98
99 return data;
100 }
101
102 static void
103 blorp_emit_3dstate_multisample(struct brw_context *brw, unsigned samples)
104 {
105 #if GEN_GEN >= 8
106 gen8_emit_3dstate_multisample(brw, samples);
107 #else
108 gen6_emit_3dstate_multisample(brw, samples);
109 #endif
110 }
111
112 #define __gen_address_type struct blorp_address
113 #define __gen_user_data struct brw_context
114
115 static uint64_t
116 __gen_combine_address(struct brw_context *brw, void *location,
117 struct blorp_address address, uint32_t delta)
118 {
119 if (address.buffer == NULL) {
120 return address.offset + delta;
121 } else {
122 return blorp_emit_reloc(brw, location, address, delta);
123 }
124 }
125
126 #include "genxml/genX_pack.h"
127
128 #define _blorp_cmd_length(cmd) cmd ## _length
129 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
130 #define _blorp_cmd_header(cmd) cmd ## _header
131 #define _blorp_cmd_pack(cmd) cmd ## _pack
132
133 #define blorp_emit(brw, cmd, name) \
134 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
135 *_dst = blorp_emit_dwords(brw, _blorp_cmd_length(cmd)); \
136 __builtin_expect(_dst != NULL, 1); \
137 _blorp_cmd_pack(cmd)(brw, (void *)_dst, &name), \
138 _dst = NULL)
139
140 #define blorp_emitn(batch, cmd, n) ({ \
141 uint32_t *_dw = blorp_emit_dwords(batch, n); \
142 struct cmd template = { \
143 _blorp_cmd_header(cmd), \
144 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
145 }; \
146 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
147 _dw + 1; /* Array starts at dw[1] */ \
148 })
149
150 /* Once vertex fetcher has written full VUE entries with complete
151 * header the space requirement is as follows per vertex (in bytes):
152 *
153 * Header Position Program constants
154 * +--------+------------+-------------------+
155 * | 16 | 16 | n x 16 |
156 * +--------+------------+-------------------+
157 *
158 * where 'n' stands for number of varying inputs expressed as vec4s.
159 *
160 * The URB size is in turn expressed in 64 bytes (512 bits).
161 */
162 static inline unsigned
163 gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
164 {
165 const unsigned num_varyings =
166 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
167 const unsigned total_needed = 16 + 16 + num_varyings * 16;
168
169 return DIV_ROUND_UP(total_needed, 64);
170 }
171
172 /* 3DSTATE_URB
173 * 3DSTATE_URB_VS
174 * 3DSTATE_URB_HS
175 * 3DSTATE_URB_DS
176 * 3DSTATE_URB_GS
177 *
178 * Assign the entire URB to the VS. Even though the VS disabled, URB space
179 * is still needed because the clipper loads the VUE's from the URB. From
180 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
181 * Dword 1.15:0 "VS Number of URB Entries":
182 * This field is always used (even if VS Function Enable is DISABLED).
183 *
184 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
185 * safely ignore it because this batch contains only one draw call.
186 * Because of URB corruption caused by allocating a previous GS unit
187 * URB entry to the VS unit, software is required to send a “GS NULL
188 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
189 * plus a dummy DRAW call before any case where VS will be taking over
190 * GS URB space.
191 *
192 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
193 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
194 *
195 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
196 * programmed in order for the programming of this state to be
197 * valid.
198 */
199 static void
200 emit_urb_config(struct brw_context *brw,
201 const struct brw_blorp_params *params)
202 {
203 #if GEN_GEN >= 7
204 const unsigned vs_entry_size = gen7_blorp_get_vs_entry_size(params);
205
206 if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
207 brw->urb.vsize >= vs_entry_size)
208 return;
209
210 brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
211
212 gen7_upload_urb(brw, vs_entry_size, false, false);
213 #else
214 blorp_emit(brw, GENX(3DSTATE_URB), urb) {
215 urb.VSNumberofURBEntries = brw->urb.max_vs_entries;
216 }
217 #endif
218 }
219
220 static void
221 blorp_emit_vertex_data(struct brw_context *brw,
222 const struct brw_blorp_params *params,
223 struct blorp_address *addr,
224 uint32_t *size)
225 {
226 const float vertices[] = {
227 /* v0 */ (float)params->x0, (float)params->y1,
228 /* v1 */ (float)params->x1, (float)params->y1,
229 /* v2 */ (float)params->x0, (float)params->y0,
230 };
231
232 void *data = blorp_alloc_vertex_buffer(&brw->blorp, sizeof(vertices), addr);
233 memcpy(data, vertices, sizeof(vertices));
234 *size = sizeof(vertices);
235 }
236
237 static void
238 blorp_emit_input_varying_data(struct brw_context *brw,
239 const struct brw_blorp_params *params,
240 struct blorp_address *addr,
241 uint32_t *size)
242 {
243 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
244 const unsigned max_num_varyings =
245 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
246 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
247
248 *size = num_varyings * vec4_size_in_bytes;
249
250 const float *const inputs_src = (const float *)&params->wm_inputs;
251 float *inputs = blorp_alloc_vertex_buffer(&brw->blorp, *size, addr);
252
253 /* Walk over the attribute slots, determine if the attribute is used by
254 * the program and when necessary copy the values from the input storage to
255 * the vertex data buffer.
256 */
257 for (unsigned i = 0; i < max_num_varyings; i++) {
258 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
259
260 if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
261 continue;
262
263 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
264
265 inputs += 4;
266 }
267 }
268
269 static void
270 blorp_emit_vertex_buffers(struct brw_context *brw,
271 const struct brw_blorp_params *params)
272 {
273 struct GENX(VERTEX_BUFFER_STATE) vb[2];
274 memset(vb, 0, sizeof(vb));
275
276 unsigned num_buffers = 1;
277
278 #if GEN_GEN == 9
279 uint32_t mocs = (2 << 1); /* SKL_MOCS_WB */
280 #elif GEN_GEN == 8
281 uint32_t mocs = 0x78; /* BDW_MOCS_WB */
282 #elif GEN_GEN == 7
283 uint32_t mocs = 1; /* GEN7_MOCS_L3 */
284 #else
285 uint32_t mocs = 0;
286 #endif
287
288 uint32_t size;
289 blorp_emit_vertex_data(brw, params, &vb[0].BufferStartingAddress, &size);
290 vb[0].VertexBufferIndex = 0;
291 vb[0].BufferPitch = 2 * sizeof(float);
292 vb[0].VertexBufferMOCS = mocs;
293 #if GEN_GEN >= 7
294 vb[0].AddressModifyEnable = true;
295 #endif
296 #if GEN_GEN >= 8
297 vb[0].BufferSize = size;
298 #else
299 vb[0].BufferAccessType = VERTEXDATA;
300 vb[0].EndAddress = vb[0].BufferStartingAddress;
301 vb[0].EndAddress.offset += size - 1;
302 #endif
303
304 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
305 blorp_emit_input_varying_data(brw, params,
306 &vb[1].BufferStartingAddress, &size);
307 vb[1].VertexBufferIndex = 1;
308 vb[1].BufferPitch = 0;
309 vb[1].VertexBufferMOCS = mocs;
310 #if GEN_GEN >= 7
311 vb[1].AddressModifyEnable = true;
312 #endif
313 #if GEN_GEN >= 8
314 vb[1].BufferSize = size;
315 #else
316 vb[1].BufferAccessType = INSTANCEDATA;
317 vb[1].EndAddress = vb[1].BufferStartingAddress;
318 vb[1].EndAddress.offset += size - 1;
319 #endif
320 num_buffers++;
321 }
322
323 const unsigned num_dwords =
324 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
325 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
326
327 for (unsigned i = 0; i < num_buffers; i++) {
328 GENX(VERTEX_BUFFER_STATE_pack)(brw, dw, &vb[i]);
329 dw += GENX(VERTEX_BUFFER_STATE_length);
330 }
331 }
332
333 static void
334 blorp_emit_vertex_elements(struct brw_context *brw,
335 const struct brw_blorp_params *params)
336 {
337 const unsigned num_varyings =
338 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
339 const unsigned num_elements = 2 + num_varyings;
340
341 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
342 memset(ve, 0, num_elements * sizeof(*ve));
343
344 /* Setup VBO for the rectangle primitive..
345 *
346 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
347 * vertices. The vertices reside in screen space with DirectX
348 * coordinates (that is, (0, 0) is the upper left corner).
349 *
350 * v2 ------ implied
351 * | |
352 * | |
353 * v0 ----- v1
354 *
355 * Since the VS is disabled, the clipper loads each VUE directly from
356 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
357 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
358 * dw0: Reserved, MBZ.
359 * dw1: Render Target Array Index. The HiZ op does not use indexed
360 * vertices, so set the dword to 0.
361 * dw2: Viewport Index. The HiZ op disables viewport mapping and
362 * scissoring, so set the dword to 0.
363 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
364 * so set the dword to 0.
365 * dw4: Vertex Position X.
366 * dw5: Vertex Position Y.
367 * dw6: Vertex Position Z.
368 * dw7: Vertex Position W.
369 *
370 * dw8: Flat vertex input 0
371 * dw9: Flat vertex input 1
372 * ...
373 * dwn: Flat vertex input n - 8
374 *
375 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
376 * "Vertex URB Entry (VUE) Formats".
377 *
378 * Only vertex position X and Y are going to be variable, Z is fixed to
379 * zero and W to one. Header words dw0-3 are all zero. There is no need to
380 * include the fixed values in the vertex buffer. Vertex fetcher can be
381 * instructed to fill vertex elements with constant values of one and zero
382 * instead of reading them from the buffer.
383 * Flat inputs are program constants that are not interpolated. Moreover
384 * their values will be the same between vertices.
385 *
386 * See the vertex element setup below.
387 */
388 ve[0].VertexBufferIndex = 0;
389 ve[0].Valid = true;
390 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
391 ve[0].SourceElementOffset = 0;
392 ve[0].Component0Control = VFCOMP_STORE_0;
393 ve[0].Component1Control = VFCOMP_STORE_0;
394 ve[0].Component2Control = VFCOMP_STORE_0;
395 ve[0].Component3Control = VFCOMP_STORE_0;
396
397 ve[1].VertexBufferIndex = 0;
398 ve[1].Valid = true;
399 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
400 ve[1].SourceElementOffset = 0;
401 ve[1].Component0Control = VFCOMP_STORE_SRC;
402 ve[1].Component1Control = VFCOMP_STORE_SRC;
403 ve[1].Component2Control = VFCOMP_STORE_0;
404 ve[1].Component3Control = VFCOMP_STORE_1_FP;
405
406 for (unsigned i = 0; i < num_varyings; ++i) {
407 ve[i + 2].VertexBufferIndex = 1;
408 ve[i + 2].Valid = true;
409 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
410 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
411 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
412 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
413 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
414 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
415 }
416
417 const unsigned num_dwords =
418 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
419 uint32_t *dw = blorp_emitn(brw, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
420
421 for (unsigned i = 0; i < num_elements; i++) {
422 GENX(VERTEX_ELEMENT_STATE_pack)(brw, dw, &ve[i]);
423 dw += GENX(VERTEX_ELEMENT_STATE_length);
424 }
425
426 #if GEN_GEN >= 8
427 blorp_emit(brw, GENX(3DSTATE_VF_SGVS), sgvs);
428
429 for (unsigned i = 0; i < num_elements; i++) {
430 blorp_emit(brw, GENX(3DSTATE_VF_INSTANCING), vf) {
431 vf.VertexElementIndex = i;
432 vf.InstancingEnable = false;
433 }
434 }
435
436 blorp_emit(brw, GENX(3DSTATE_VF_TOPOLOGY), topo) {
437 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
438 }
439 #endif
440 }
441
442 static void
443 blorp_emit_sf_config(struct brw_context *brw,
444 const struct brw_blorp_params *params)
445 {
446 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
447
448 /* 3DSTATE_SF
449 *
450 * Disable ViewportTransformEnable (dw2.1)
451 *
452 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
453 * Primitives Overview":
454 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
455 * use of screen- space coordinates).
456 *
457 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
458 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
459 *
460 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
461 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
462 * SOLID: Any triangle or rectangle object found to be front-facing
463 * is rendered as a solid object. This setting is required when
464 * (rendering rectangle (RECTLIST) objects.
465 */
466
467 #if GEN_GEN >= 8
468
469 blorp_emit(brw, GENX(3DSTATE_SF), sf);
470
471 blorp_emit(brw, GENX(3DSTATE_RASTER), raster) {
472 raster.CullMode = CULLMODE_NONE;
473 }
474
475 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
476 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
477 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
478 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
479 sbe.ForceVertexURBEntryReadLength = true;
480 sbe.ForceVertexURBEntryReadOffset = true;
481 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
482
483 #if GEN_GEN >= 9
484 for (unsigned i = 0; i < 32; i++)
485 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
486 #endif
487 }
488
489 #elif GEN_GEN >= 7
490
491 blorp_emit(brw, GENX(3DSTATE_SF), sf) {
492 sf.FrontFaceFillMode = FILL_MODE_SOLID;
493 sf.BackFaceFillMode = FILL_MODE_SOLID;
494
495 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
496 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
497
498 #if GEN_GEN == 7
499 sf.DepthBufferSurfaceFormat = params->depth_format;
500 #endif
501 }
502
503 blorp_emit(brw, GENX(3DSTATE_SBE), sbe) {
504 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
505 if (prog_data) {
506 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
507 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
508 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
509 } else {
510 sbe.NumberofSFOutputAttributes = 0;
511 sbe.VertexURBEntryReadLength = 1;
512 }
513 }
514
515 #else /* GEN_GEN <= 6 */
516
517 blorp_emit(brw, GENX(3DSTATE_SF), sf) {
518 sf.FrontFaceFillMode = FILL_MODE_SOLID;
519 sf.BackFaceFillMode = FILL_MODE_SOLID;
520
521 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
522 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
523
524 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
525 if (prog_data) {
526 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
527 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
528 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
529 } else {
530 sf.NumberofSFOutputAttributes = 0;
531 sf.VertexURBEntryReadLength = 1;
532 }
533 }
534
535 #endif /* GEN_GEN */
536 }
537
538 static void
539 blorp_emit_ps_config(struct brw_context *brw,
540 const struct brw_blorp_params *params)
541 {
542 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
543
544 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
545 * nonzero to prevent the GPU from hanging. While the documentation doesn't
546 * mention this explicitly, it notes that the valid range for the field is
547 * [1,39] = [2,40] threads, which excludes zero.
548 *
549 * To be safe (and to minimize extraneous code) we go ahead and fully
550 * configure the WM state whether or not there is a WM program.
551 */
552
553 #if GEN_GEN >= 8
554
555 blorp_emit(brw, GENX(3DSTATE_WM), wm);
556
557 blorp_emit(brw, GENX(3DSTATE_PS), ps) {
558 if (params->src.bo) {
559 ps.SamplerCount = 1; /* Up to 4 samplers */
560 ps.BindingTableEntryCount = 2;
561 } else {
562 ps.BindingTableEntryCount = 1;
563 }
564
565 ps.DispatchGRFStartRegisterForConstantSetupData0 =
566 prog_data->first_curbe_grf_0;
567 ps.DispatchGRFStartRegisterForConstantSetupData2 =
568 prog_data->first_curbe_grf_2;
569
570 ps._8PixelDispatchEnable = prog_data->dispatch_8;
571 ps._16PixelDispatchEnable = prog_data->dispatch_16;
572
573 ps.KernelStartPointer0 = params->wm_prog_kernel;
574 ps.KernelStartPointer2 =
575 params->wm_prog_kernel + prog_data->ksp_offset_2;
576
577 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
578 * it implicitly scales for different GT levels (which have some # of
579 * PSDs).
580 *
581 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
582 */
583 if (GEN_GEN >= 9)
584 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
585 else
586 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
587
588 switch (params->fast_clear_op) {
589 #if GEN_GEN >= 9
590 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
591 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
592 break;
593 case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
594 ps.RenderTargetResolveType = RESOLVE_FULL;
595 break;
596 #else
597 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
598 ps.RenderTargetResolveEnable = true;
599 break;
600 #endif
601 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
602 ps.RenderTargetFastClearEnable = true;
603 break;
604 }
605 }
606
607 blorp_emit(brw, GENX(3DSTATE_PS_EXTRA), psx) {
608 psx.PixelShaderValid = true;
609
610 if (params->src.bo)
611 psx.PixelShaderKillsPixel = true;
612
613 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
614
615 if (prog_data && prog_data->persample_msaa_dispatch)
616 psx.PixelShaderIsPerSample = true;
617 }
618
619 #elif GEN_GEN >= 7
620
621 blorp_emit(brw, GENX(3DSTATE_WM), wm) {
622 switch (params->hiz_op) {
623 case GEN6_HIZ_OP_DEPTH_CLEAR:
624 wm.DepthBufferClear = true;
625 break;
626 case GEN6_HIZ_OP_DEPTH_RESOLVE:
627 wm.DepthBufferResolveEnable = true;
628 break;
629 case GEN6_HIZ_OP_HIZ_RESOLVE:
630 wm.HierarchicalDepthBufferResolveEnable = true;
631 break;
632 case GEN6_HIZ_OP_NONE:
633 break;
634 default:
635 unreachable("not reached");
636 }
637
638 if (prog_data)
639 wm.ThreadDispatchEnable = true;
640
641 if (params->src.bo)
642 wm.PixelShaderKillPixel = true;
643
644 if (params->dst.surf.samples > 1) {
645 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
646 wm.MultisampleDispatchMode =
647 (prog_data && prog_data->persample_msaa_dispatch) ?
648 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
649 } else {
650 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
651 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
652 }
653 }
654
655 blorp_emit(brw, GENX(3DSTATE_PS), ps) {
656 ps.MaximumNumberofThreads = brw->max_wm_threads - 1;
657
658 #if GEN_IS_HASWELL
659 ps.SampleMask = 1;
660 #endif
661
662 if (prog_data) {
663 ps.DispatchGRFStartRegisterforConstantSetupData0 =
664 prog_data->first_curbe_grf_0;
665 ps.DispatchGRFStartRegisterforConstantSetupData2 =
666 prog_data->first_curbe_grf_2;
667
668 ps.KernelStartPointer0 = params->wm_prog_kernel;
669 ps.KernelStartPointer2 =
670 params->wm_prog_kernel + prog_data->ksp_offset_2;
671
672 ps._8PixelDispatchEnable = prog_data->dispatch_8;
673 ps._16PixelDispatchEnable = prog_data->dispatch_16;
674
675 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
676 } else {
677 /* Gen7 hardware gets angry if we don't enable at least one dispatch
678 * mode, so just enable 16-pixel dispatch if we don't have a program.
679 */
680 ps._16PixelDispatchEnable = true;
681 }
682
683 if (params->src.bo)
684 ps.SamplerCount = 1; /* Up to 4 samplers */
685
686 switch (params->fast_clear_op) {
687 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
688 ps.RenderTargetResolveEnable = true;
689 break;
690 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
691 ps.RenderTargetFastClearEnable = true;
692 break;
693 }
694 }
695
696 #else /* GEN_GEN <= 6 */
697
698 blorp_emit(brw, GENX(3DSTATE_WM), wm) {
699 wm.MaximumNumberofThreads = brw->max_wm_threads - 1;
700
701 switch (params->hiz_op) {
702 case GEN6_HIZ_OP_DEPTH_CLEAR:
703 wm.DepthBufferClear = true;
704 break;
705 case GEN6_HIZ_OP_DEPTH_RESOLVE:
706 wm.DepthBufferResolveEnable = true;
707 break;
708 case GEN6_HIZ_OP_HIZ_RESOLVE:
709 wm.HierarchicalDepthBufferResolveEnable = true;
710 break;
711 case GEN6_HIZ_OP_NONE:
712 break;
713 default:
714 unreachable("not reached");
715 }
716
717 if (prog_data) {
718 wm.ThreadDispatchEnable = true;
719
720 wm.DispatchGRFStartRegisterforConstantSetupData0 =
721 prog_data->first_curbe_grf_0;
722 wm.DispatchGRFStartRegisterforConstantSetupData2 =
723 prog_data->first_curbe_grf_2;
724
725 wm.KernelStartPointer0 = params->wm_prog_kernel;
726 wm.KernelStartPointer2 =
727 params->wm_prog_kernel + prog_data->ksp_offset_2;
728
729 wm._8PixelDispatchEnable = prog_data->dispatch_8;
730 wm._16PixelDispatchEnable = prog_data->dispatch_16;
731
732 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
733 }
734
735 if (params->src.bo) {
736 wm.SamplerCount = 1; /* Up to 4 samplers */
737 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
738 }
739
740 if (params->dst.surf.samples > 1) {
741 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
742 wm.MultisampleDispatchMode =
743 (prog_data && prog_data->persample_msaa_dispatch) ?
744 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
745 } else {
746 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
747 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
748 }
749 }
750
751 #endif /* GEN_GEN */
752 }
753
754
755 static void
756 blorp_emit_depth_stencil_config(struct brw_context *brw,
757 const struct brw_blorp_params *params)
758 {
759 brw_emit_depth_stall_flushes(brw);
760
761 #if GEN_GEN >= 7
762 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
763 #else
764 const uint32_t mocs = 0;
765 #endif
766
767 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
768 switch (params->depth.surf.dim) {
769 case ISL_SURF_DIM_1D:
770 db.SurfaceType = SURFTYPE_1D;
771 break;
772 case ISL_SURF_DIM_2D:
773 db.SurfaceType = SURFTYPE_2D;
774 break;
775 case ISL_SURF_DIM_3D:
776 db.SurfaceType = SURFTYPE_3D;
777 break;
778 }
779
780 db.SurfaceFormat = params->depth_format;
781
782 #if GEN_GEN >= 7
783 db.DepthWriteEnable = true;
784 #endif
785
786 #if GEN_GEN <= 6
787 db.TiledSurface = true;
788 db.TileWalk = TILEWALK_YMAJOR;
789 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
790 db.SeparateStencilBufferEnable = true;
791 #endif
792
793 db.HierarchicalDepthBufferEnable = true;
794
795 db.Width = params->depth.surf.logical_level0_px.width - 1;
796 db.Height = params->depth.surf.logical_level0_px.height - 1;
797 db.RenderTargetViewExtent = db.Depth =
798 MAX2(params->depth.surf.logical_level0_px.depth,
799 params->depth.surf.logical_level0_px.array_len) - 1;
800
801 db.LOD = params->depth.view.base_level;
802 db.MinimumArrayElement = params->depth.view.base_array_layer;
803
804 db.SurfacePitch = params->depth.surf.row_pitch - 1;
805 db.SurfaceBaseAddress = (struct blorp_address) {
806 .buffer = params->depth.bo,
807 .read_domains = I915_GEM_DOMAIN_RENDER,
808 .write_domain = I915_GEM_DOMAIN_RENDER,
809 .offset = params->depth.offset,
810 };
811 db.DepthBufferMOCS = mocs;
812 }
813
814 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
815 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
816 hiz.SurfaceBaseAddress = (struct blorp_address) {
817 .buffer = params->depth.aux_bo,
818 .read_domains = I915_GEM_DOMAIN_RENDER,
819 .write_domain = I915_GEM_DOMAIN_RENDER,
820 .offset = params->depth.aux_offset,
821 };
822 hiz.HierarchicalDepthBufferMOCS = mocs;
823 }
824
825 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
826 }
827
828 static uint32_t
829 blorp_emit_blend_state(struct brw_context *brw,
830 const struct brw_blorp_params *params)
831 {
832 struct GENX(BLEND_STATE) blend;
833 memset(&blend, 0, sizeof(blend));
834
835 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
836 blend.Entry[i].PreBlendColorClampEnable = true;
837 blend.Entry[i].PostBlendColorClampEnable = true;
838 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
839
840 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
841 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
842 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
843 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
844 }
845
846 uint32_t offset;
847 void *state = blorp_alloc_dynamic_state(&brw->blorp,
848 AUB_TRACE_BLEND_STATE,
849 GENX(BLEND_STATE_length) * 4,
850 64, &offset);
851 GENX(BLEND_STATE_pack)(NULL, state, &blend);
852
853 #if GEN_GEN >= 7
854 blorp_emit(brw, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
855 sp.BlendStatePointer = offset;
856 #if GEN_GEN >= 8
857 sp.BlendStatePointerValid = true;
858 #endif
859 }
860 #endif
861
862 #if GEN_GEN >= 8
863 blorp_emit(brw, GENX(3DSTATE_PS_BLEND), ps_blend) {
864 ps_blend.HasWriteableRT = true;
865 }
866 #endif
867
868 return offset;
869 }
870
871 static uint32_t
872 blorp_emit_color_calc_state(struct brw_context *brw,
873 const struct brw_blorp_params *params)
874 {
875 uint32_t offset;
876 void *state = blorp_alloc_dynamic_state(&brw->blorp,
877 AUB_TRACE_CC_STATE,
878 GENX(COLOR_CALC_STATE_length) * 4,
879 64, &offset);
880 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
881
882 #if GEN_GEN >= 7
883 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
884 sp.ColorCalcStatePointer = offset;
885 #if GEN_GEN >= 8
886 sp.ColorCalcStatePointerValid = true;
887 #endif
888 }
889 #endif
890
891 return offset;
892 }
893
894 static uint32_t
895 blorp_emit_depth_stencil_state(struct brw_context *brw,
896 const struct brw_blorp_params *params)
897 {
898 #if GEN_GEN >= 8
899
900 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
901 blorp_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
902 return 0;
903
904 #else /* GEN_GEN <= 7 */
905
906 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
907 * - 7.5.3.1 Depth Buffer Clear
908 * - 7.5.3.2 Depth Buffer Resolve
909 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
910 */
911 struct GENX(DEPTH_STENCIL_STATE) ds = {
912 .DepthBufferWriteEnable = true,
913 };
914
915 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
916 ds.DepthTestEnable = true;
917 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
918 }
919
920 uint32_t offset;
921 void *state = blorp_alloc_dynamic_state(&brw->blorp,
922 AUB_TRACE_DEPTH_STENCIL_STATE,
923 GENX(DEPTH_STENCIL_STATE_length) * 4,
924 64, &offset);
925 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
926
927 #if GEN_GEN >= 7
928 blorp_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
929 sp.PointertoDEPTH_STENCIL_STATE = offset;
930 }
931 #endif
932
933 return offset;
934
935 #endif /* GEN_GEN */
936 }
937
938 static void
939 blorp_emit_surface_states(struct brw_context *brw,
940 const struct brw_blorp_params *params)
941 {
942 uint32_t bind_offset;
943 uint32_t *bind =
944 brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
945 sizeof(uint32_t) * BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
946 32, /* alignment */ &bind_offset);
947
948 bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
949 brw_blorp_emit_surface_state(brw, &params->dst,
950 I915_GEM_DOMAIN_RENDER,
951 I915_GEM_DOMAIN_RENDER, true);
952 if (params->src.bo) {
953 bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] =
954 brw_blorp_emit_surface_state(brw, &params->src,
955 I915_GEM_DOMAIN_SAMPLER, 0, false);
956 }
957
958 #if GEN_GEN >= 7
959 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
960 bt.PointertoPSBindingTable = bind_offset;
961 }
962 #else
963 blorp_emit(brw, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
964 bt.PSBindingTableChange = true;
965 bt.PointertoPSBindingTable = bind_offset;
966 }
967 #endif
968 }
969
970 static void
971 blorp_emit_sampler_state(struct brw_context *brw,
972 const struct brw_blorp_params *params)
973 {
974 struct GENX(SAMPLER_STATE) sampler = {
975 .MipModeFilter = MIPFILTER_NONE,
976 .MagModeFilter = MAPFILTER_LINEAR,
977 .MinModeFilter = MAPFILTER_LINEAR,
978 .MinLOD = 0,
979 .MaxLOD = 0,
980 .TCXAddressControlMode = TCM_CLAMP,
981 .TCYAddressControlMode = TCM_CLAMP,
982 .TCZAddressControlMode = TCM_CLAMP,
983 .MaximumAnisotropy = RATIO21,
984 .RAddressMinFilterRoundingEnable = true,
985 .RAddressMagFilterRoundingEnable = true,
986 .VAddressMinFilterRoundingEnable = true,
987 .VAddressMagFilterRoundingEnable = true,
988 .UAddressMinFilterRoundingEnable = true,
989 .UAddressMagFilterRoundingEnable = true,
990 .NonnormalizedCoordinateEnable = true,
991 };
992
993 uint32_t offset;
994 void *state = blorp_alloc_dynamic_state(&brw->blorp,
995 AUB_TRACE_SAMPLER_STATE,
996 GENX(SAMPLER_STATE_length) * 4,
997 32, &offset);
998 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
999
1000 #if GEN_GEN >= 7
1001 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1002 ssp.PointertoPSSamplerState = offset;
1003 }
1004 #else
1005 blorp_emit(brw, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1006 ssp.VSSamplerStateChange = true;
1007 ssp.GSSamplerStateChange = true;
1008 ssp.PSSamplerStateChange = true;
1009 ssp.PointertoPSSamplerState = offset;
1010 }
1011 #endif
1012 }
1013
1014 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1015 static void
1016 blorp_emit_viewport_state(struct brw_context *brw,
1017 const struct brw_blorp_params *params)
1018 {
1019 uint32_t cc_vp_offset;
1020
1021 void *state = blorp_alloc_dynamic_state(&brw->blorp,
1022 AUB_TRACE_CC_VP_STATE,
1023 GENX(CC_VIEWPORT_length) * 4, 32,
1024 &cc_vp_offset);
1025
1026 GENX(CC_VIEWPORT_pack)(brw, state,
1027 &(struct GENX(CC_VIEWPORT)) {
1028 .MinimumDepth = 0.0,
1029 .MaximumDepth = 1.0,
1030 });
1031
1032 #if GEN_GEN >= 7
1033 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1034 vsp.CCViewportPointer = cc_vp_offset;
1035 }
1036 #else
1037 blorp_emit(brw, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1038 vsp.CCViewportStateChange = true;
1039 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1040 }
1041 #endif
1042 }
1043
1044
1045 /**
1046 * \brief Execute a blit or render pass operation.
1047 *
1048 * To execute the operation, this function manually constructs and emits a
1049 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1050 * constructing and after emitting the batch.
1051 *
1052 * This function alters no GL state.
1053 */
1054 void
1055 genX(blorp_exec)(struct brw_context *brw,
1056 const struct brw_blorp_params *params)
1057 {
1058 uint32_t blend_state_offset = 0;
1059 uint32_t color_calc_state_offset = 0;
1060 uint32_t depth_stencil_state_offset;
1061
1062 #if GEN_GEN == 6
1063 /* Emit workaround flushes when we switch from drawing to blorping. */
1064 brw_emit_post_sync_nonzero_flush(brw);
1065 #endif
1066
1067 brw_upload_state_base_address(brw);
1068
1069 #if GEN_GEN >= 8
1070 gen7_l3_state.emit(brw);
1071 #endif
1072
1073 blorp_emit_vertex_buffers(brw, params);
1074 blorp_emit_vertex_elements(brw, params);
1075
1076 emit_urb_config(brw, params);
1077
1078 if (params->wm_prog_data) {
1079 blend_state_offset = blorp_emit_blend_state(brw, params);
1080 color_calc_state_offset = blorp_emit_color_calc_state(brw, params);
1081 }
1082 depth_stencil_state_offset = blorp_emit_depth_stencil_state(brw, params);
1083
1084 #if GEN_GEN <= 6
1085 /* 3DSTATE_CC_STATE_POINTERS
1086 *
1087 * The pointer offsets are relative to
1088 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1089 *
1090 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1091 *
1092 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1093 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1094 * one CC_STATE_POINTERS packet so we have to emit that here.
1095 */
1096 blorp_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1097 cc.BLEND_STATEChange = true;
1098 cc.COLOR_CALC_STATEChange = true;
1099 cc.DEPTH_STENCIL_STATEChange = true;
1100 cc.PointertoBLEND_STATE = blend_state_offset;
1101 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1102 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1103 }
1104 #else
1105 (void)blend_state_offset;
1106 (void)color_calc_state_offset;
1107 (void)depth_stencil_state_offset;
1108 #endif
1109
1110 blorp_emit(brw, GENX(3DSTATE_CONSTANT_VS), vs);
1111 #if GEN_GEN >= 7
1112 blorp_emit(brw, GENX(3DSTATE_CONSTANT_HS), hs);
1113 blorp_emit(brw, GENX(3DSTATE_CONSTANT_DS), DS);
1114 #endif
1115 blorp_emit(brw, GENX(3DSTATE_CONSTANT_GS), gs);
1116 blorp_emit(brw, GENX(3DSTATE_CONSTANT_PS), ps);
1117
1118 if (brw->use_resource_streamer)
1119 gen7_disable_hw_binding_tables(brw);
1120
1121 if (params->wm_prog_data)
1122 blorp_emit_surface_states(brw, params);
1123
1124 if (params->src.bo)
1125 blorp_emit_sampler_state(brw, params);
1126
1127 blorp_emit_3dstate_multisample(brw, params->dst.surf.samples);
1128
1129 blorp_emit(brw, GENX(3DSTATE_SAMPLE_MASK), mask) {
1130 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1131 }
1132
1133 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1134 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1135 *
1136 * [DevSNB] A pipeline flush must be programmed prior to a
1137 * 3DSTATE_VS command that causes the VS Function Enable to
1138 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1139 * command with CS stall bit set and a post sync operation.
1140 *
1141 * We've already done one at the start of the BLORP operation.
1142 */
1143 blorp_emit(brw, GENX(3DSTATE_VS), vs);
1144 #if GEN_GEN >= 7
1145 blorp_emit(brw, GENX(3DSTATE_HS), hs);
1146 blorp_emit(brw, GENX(3DSTATE_TE), te);
1147 blorp_emit(brw, GENX(3DSTATE_DS), DS);
1148 blorp_emit(brw, GENX(3DSTATE_STREAMOUT), so);
1149 #endif
1150 blorp_emit(brw, GENX(3DSTATE_GS), gs);
1151
1152 blorp_emit(brw, GENX(3DSTATE_CLIP), clip) {
1153 clip.PerspectiveDivideDisable = true;
1154 }
1155
1156 blorp_emit_sf_config(brw, params);
1157 blorp_emit_ps_config(brw, params);
1158
1159 blorp_emit_viewport_state(brw, params);
1160
1161 if (params->depth.bo) {
1162 blorp_emit_depth_stencil_config(brw, params);
1163 } else {
1164 brw_emit_depth_stall_flushes(brw);
1165
1166 blorp_emit(brw, GENX(3DSTATE_DEPTH_BUFFER), db) {
1167 db.SurfaceType = SURFTYPE_NULL;
1168 db.SurfaceFormat = D32_FLOAT;
1169 }
1170 blorp_emit(brw, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1171 blorp_emit(brw, GENX(3DSTATE_STENCIL_BUFFER), sb);
1172 }
1173
1174 /* 3DSTATE_CLEAR_PARAMS
1175 *
1176 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1177 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1178 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1179 */
1180 blorp_emit(brw, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1181 clear.DepthClearValueValid = true;
1182 clear.DepthClearValue = params->depth.clear_color.u32[0];
1183 }
1184
1185 blorp_emit(brw, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1186 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1187 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1188 }
1189
1190 blorp_emit(brw, GENX(3DPRIMITIVE), prim) {
1191 prim.VertexAccessType = SEQUENTIAL;
1192 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1193 prim.VertexCountPerInstance = 3;
1194 prim.InstanceCount = params->num_layers;
1195 }
1196 }