i965/blorp/exec: Refactor to use a new blorp_batch struct
[mesa.git] / src / mesa / drivers / dri / i965 / genX_blorp_exec.c
1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25
26 #include "intel_batchbuffer.h"
27 #include "intel_mipmap_tree.h"
28
29 #include "brw_context.h"
30 #include "brw_state.h"
31
32 #include "blorp_priv.h"
33
34 #include "genxml/gen_macros.h"
35
36 static void *
37 blorp_emit_dwords(struct blorp_batch *batch, unsigned n)
38 {
39 assert(batch->blorp->driver_ctx == batch->driver_batch);
40 struct brw_context *brw = batch->driver_batch;
41
42 intel_batchbuffer_begin(brw, n, RENDER_RING);
43 uint32_t *map = brw->batch.map_next;
44 brw->batch.map_next += n;
45 intel_batchbuffer_advance(brw);
46 return map;
47 }
48
49 static uint64_t
50 blorp_emit_reloc(struct blorp_batch *batch,
51 void *location, struct blorp_address address, uint32_t delta)
52 {
53 assert(batch->blorp->driver_ctx == batch->driver_batch);
54 struct brw_context *brw = batch->driver_batch;
55
56 uint32_t offset = (char *)location - (char *)brw->batch.map;
57 if (brw->gen >= 8) {
58 return intel_batchbuffer_reloc64(brw, address.buffer, offset,
59 address.read_domains,
60 address.write_domain,
61 address.offset + delta);
62 } else {
63 return intel_batchbuffer_reloc(brw, address.buffer, offset,
64 address.read_domains,
65 address.write_domain,
66 address.offset + delta);
67 }
68 }
69
70 static void
71 blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
72 struct blorp_address address, uint32_t delta)
73 {
74 assert(batch->blorp->driver_ctx == batch->driver_batch);
75 struct brw_context *brw = batch->driver_batch;
76
77 drm_intel_bo_emit_reloc(brw->batch.bo, ss_offset,
78 address.buffer, address.offset + delta,
79 address.read_domains, address.write_domain);
80
81 uint64_t reloc_val = address.buffer->offset64 + address.offset + delta;
82 void *reloc_ptr = (void *)brw->batch.map + ss_offset;
83 #if GEN_GEN >= 8
84 *(uint64_t *)reloc_ptr = reloc_val;
85 #else
86 *(uint32_t *)reloc_ptr = reloc_val;
87 #endif
88 }
89
90 static void *
91 blorp_alloc_dynamic_state(struct blorp_batch *batch,
92 enum aub_state_struct_type type,
93 uint32_t size,
94 uint32_t alignment,
95 uint32_t *offset)
96 {
97 assert(batch->blorp->driver_ctx == batch->driver_batch);
98 struct brw_context *brw = batch->driver_batch;
99
100 return brw_state_batch(brw, type, size, alignment, offset);
101 }
102
103 static void
104 blorp_alloc_binding_table(struct blorp_batch *batch, unsigned num_entries,
105 unsigned state_size, unsigned state_alignment,
106 uint32_t *bt_offset, uint32_t **bt_map,
107 void **surface_maps)
108 {
109 assert(batch->blorp->driver_ctx == batch->driver_batch);
110 struct brw_context *brw = batch->driver_batch;
111
112 *bt_map = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
113 num_entries * sizeof(uint32_t), 32,
114 bt_offset);
115
116 for (unsigned i = 0; i < num_entries; i++) {
117 surface_maps[i] = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
118 state_size, state_alignment,
119 &(*bt_map)[i]);
120 }
121 }
122
123 static void *
124 blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
125 struct blorp_address *addr)
126 {
127 assert(batch->blorp->driver_ctx == batch->driver_batch);
128 struct brw_context *brw = batch->driver_batch;
129
130 uint32_t offset;
131 void *data = brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
132 size, 32, &offset);
133
134 *addr = (struct blorp_address) {
135 .buffer = brw->batch.bo,
136 .read_domains = I915_GEM_DOMAIN_VERTEX,
137 .write_domain = 0,
138 .offset = offset,
139 };
140
141 return data;
142 }
143
144 static void
145 blorp_emit_urb_config(struct blorp_batch *batch, unsigned vs_entry_size)
146 {
147 assert(batch->blorp->driver_ctx == batch->driver_batch);
148 struct brw_context *brw = batch->driver_batch;
149
150 #if GEN_GEN >= 7
151 if (!(brw->ctx.NewDriverState & (BRW_NEW_CONTEXT | BRW_NEW_URB_SIZE)) &&
152 brw->urb.vsize >= vs_entry_size)
153 return;
154
155 brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
156
157 gen7_upload_urb(brw, vs_entry_size, false, false);
158 #else
159 gen6_upload_urb(brw, vs_entry_size, false, 0);
160 #endif
161 }
162
163 static void
164 blorp_emit_3dstate_multisample(struct blorp_batch *batch, unsigned samples)
165 {
166 assert(batch->blorp->driver_ctx == batch->driver_batch);
167 struct brw_context *brw = batch->driver_batch;
168
169 #if GEN_GEN >= 8
170 gen8_emit_3dstate_multisample(brw, samples);
171 #else
172 gen6_emit_3dstate_multisample(brw, samples);
173 #endif
174 }
175
176 #define __gen_address_type struct blorp_address
177 #define __gen_user_data struct blorp_batch
178
179 static uint64_t
180 __gen_combine_address(struct blorp_batch *batch, void *location,
181 struct blorp_address address, uint32_t delta)
182 {
183 if (address.buffer == NULL) {
184 return address.offset + delta;
185 } else {
186 return blorp_emit_reloc(batch, location, address, delta);
187 }
188 }
189
190 #include "genxml/genX_pack.h"
191
192 #define _blorp_cmd_length(cmd) cmd ## _length
193 #define _blorp_cmd_length_bias(cmd) cmd ## _length_bias
194 #define _blorp_cmd_header(cmd) cmd ## _header
195 #define _blorp_cmd_pack(cmd) cmd ## _pack
196
197 #define blorp_emit(batch, cmd, name) \
198 for (struct cmd name = { _blorp_cmd_header(cmd) }, \
199 *_dst = blorp_emit_dwords(batch, _blorp_cmd_length(cmd)); \
200 __builtin_expect(_dst != NULL, 1); \
201 _blorp_cmd_pack(cmd)(batch, (void *)_dst, &name), \
202 _dst = NULL)
203
204 #define blorp_emitn(batch, cmd, n) ({ \
205 uint32_t *_dw = blorp_emit_dwords(batch, n); \
206 struct cmd template = { \
207 _blorp_cmd_header(cmd), \
208 .DWordLength = n - _blorp_cmd_length_bias(cmd), \
209 }; \
210 _blorp_cmd_pack(cmd)(batch, _dw, &template); \
211 _dw + 1; /* Array starts at dw[1] */ \
212 })
213
214 /* Once vertex fetcher has written full VUE entries with complete
215 * header the space requirement is as follows per vertex (in bytes):
216 *
217 * Header Position Program constants
218 * +--------+------------+-------------------+
219 * | 16 | 16 | n x 16 |
220 * +--------+------------+-------------------+
221 *
222 * where 'n' stands for number of varying inputs expressed as vec4s.
223 *
224 * The URB size is in turn expressed in 64 bytes (512 bits).
225 */
226 static inline unsigned
227 gen7_blorp_get_vs_entry_size(const struct brw_blorp_params *params)
228 {
229 const unsigned num_varyings =
230 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
231 const unsigned total_needed = 16 + 16 + num_varyings * 16;
232
233 return DIV_ROUND_UP(total_needed, 64);
234 }
235
236 /* 3DSTATE_URB
237 * 3DSTATE_URB_VS
238 * 3DSTATE_URB_HS
239 * 3DSTATE_URB_DS
240 * 3DSTATE_URB_GS
241 *
242 * Assign the entire URB to the VS. Even though the VS disabled, URB space
243 * is still needed because the clipper loads the VUE's from the URB. From
244 * the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
245 * Dword 1.15:0 "VS Number of URB Entries":
246 * This field is always used (even if VS Function Enable is DISABLED).
247 *
248 * The warning below appears in the PRM (Section 3DSTATE_URB), but we can
249 * safely ignore it because this batch contains only one draw call.
250 * Because of URB corruption caused by allocating a previous GS unit
251 * URB entry to the VS unit, software is required to send a “GS NULL
252 * Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
253 * plus a dummy DRAW call before any case where VS will be taking over
254 * GS URB space.
255 *
256 * If the 3DSTATE_URB_VS is emitted, than the others must be also.
257 * From the Ivybridge PRM, Volume 2 Part 1, section 1.7.1 3DSTATE_URB_VS:
258 *
259 * 3DSTATE_URB_HS, 3DSTATE_URB_DS, and 3DSTATE_URB_GS must also be
260 * programmed in order for the programming of this state to be
261 * valid.
262 */
263 static void
264 emit_urb_config(struct blorp_batch *batch,
265 const struct brw_blorp_params *params)
266 {
267 blorp_emit_urb_config(batch, gen7_blorp_get_vs_entry_size(params));
268 }
269
270 static void
271 blorp_emit_vertex_data(struct blorp_batch *batch,
272 const struct brw_blorp_params *params,
273 struct blorp_address *addr,
274 uint32_t *size)
275 {
276 const float vertices[] = {
277 /* v0 */ (float)params->x0, (float)params->y1,
278 /* v1 */ (float)params->x1, (float)params->y1,
279 /* v2 */ (float)params->x0, (float)params->y0,
280 };
281
282 void *data = blorp_alloc_vertex_buffer(batch, sizeof(vertices), addr);
283 memcpy(data, vertices, sizeof(vertices));
284 *size = sizeof(vertices);
285 }
286
287 static void
288 blorp_emit_input_varying_data(struct blorp_batch *batch,
289 const struct brw_blorp_params *params,
290 struct blorp_address *addr,
291 uint32_t *size)
292 {
293 const unsigned vec4_size_in_bytes = 4 * sizeof(float);
294 const unsigned max_num_varyings =
295 DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
296 const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
297
298 *size = num_varyings * vec4_size_in_bytes;
299
300 const float *const inputs_src = (const float *)&params->wm_inputs;
301 float *inputs = blorp_alloc_vertex_buffer(batch, *size, addr);
302
303 /* Walk over the attribute slots, determine if the attribute is used by
304 * the program and when necessary copy the values from the input storage to
305 * the vertex data buffer.
306 */
307 for (unsigned i = 0; i < max_num_varyings; i++) {
308 const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
309
310 if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
311 continue;
312
313 memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
314
315 inputs += 4;
316 }
317 }
318
319 static void
320 blorp_emit_vertex_buffers(struct blorp_batch *batch,
321 const struct brw_blorp_params *params)
322 {
323 struct GENX(VERTEX_BUFFER_STATE) vb[2];
324 memset(vb, 0, sizeof(vb));
325
326 unsigned num_buffers = 1;
327
328 uint32_t size;
329 blorp_emit_vertex_data(batch, params, &vb[0].BufferStartingAddress, &size);
330 vb[0].VertexBufferIndex = 0;
331 vb[0].BufferPitch = 2 * sizeof(float);
332 vb[0].VertexBufferMOCS = batch->blorp->mocs.vb;
333 #if GEN_GEN >= 7
334 vb[0].AddressModifyEnable = true;
335 #endif
336 #if GEN_GEN >= 8
337 vb[0].BufferSize = size;
338 #else
339 vb[0].BufferAccessType = VERTEXDATA;
340 vb[0].EndAddress = vb[0].BufferStartingAddress;
341 vb[0].EndAddress.offset += size - 1;
342 #endif
343
344 if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs) {
345 blorp_emit_input_varying_data(batch, params,
346 &vb[1].BufferStartingAddress, &size);
347 vb[1].VertexBufferIndex = 1;
348 vb[1].BufferPitch = 0;
349 vb[1].VertexBufferMOCS = batch->blorp->mocs.vb;
350 #if GEN_GEN >= 7
351 vb[1].AddressModifyEnable = true;
352 #endif
353 #if GEN_GEN >= 8
354 vb[1].BufferSize = size;
355 #else
356 vb[1].BufferAccessType = INSTANCEDATA;
357 vb[1].EndAddress = vb[1].BufferStartingAddress;
358 vb[1].EndAddress.offset += size - 1;
359 #endif
360 num_buffers++;
361 }
362
363 const unsigned num_dwords =
364 1 + GENX(VERTEX_BUFFER_STATE_length) * num_buffers;
365 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
366
367 for (unsigned i = 0; i < num_buffers; i++) {
368 GENX(VERTEX_BUFFER_STATE_pack)(batch, dw, &vb[i]);
369 dw += GENX(VERTEX_BUFFER_STATE_length);
370 }
371 }
372
373 static void
374 blorp_emit_vertex_elements(struct blorp_batch *batch,
375 const struct brw_blorp_params *params)
376 {
377 const unsigned num_varyings =
378 params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
379 const unsigned num_elements = 2 + num_varyings;
380
381 struct GENX(VERTEX_ELEMENT_STATE) ve[num_elements];
382 memset(ve, 0, num_elements * sizeof(*ve));
383
384 /* Setup VBO for the rectangle primitive..
385 *
386 * A rectangle primitive (3DPRIM_RECTLIST) consists of only three
387 * vertices. The vertices reside in screen space with DirectX
388 * coordinates (that is, (0, 0) is the upper left corner).
389 *
390 * v2 ------ implied
391 * | |
392 * | |
393 * v0 ----- v1
394 *
395 * Since the VS is disabled, the clipper loads each VUE directly from
396 * the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
397 * 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
398 * dw0: Reserved, MBZ.
399 * dw1: Render Target Array Index. The HiZ op does not use indexed
400 * vertices, so set the dword to 0.
401 * dw2: Viewport Index. The HiZ op disables viewport mapping and
402 * scissoring, so set the dword to 0.
403 * dw3: Point Width: The HiZ op does not emit the POINTLIST primitive,
404 * so set the dword to 0.
405 * dw4: Vertex Position X.
406 * dw5: Vertex Position Y.
407 * dw6: Vertex Position Z.
408 * dw7: Vertex Position W.
409 *
410 * dw8: Flat vertex input 0
411 * dw9: Flat vertex input 1
412 * ...
413 * dwn: Flat vertex input n - 8
414 *
415 * For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
416 * "Vertex URB Entry (VUE) Formats".
417 *
418 * Only vertex position X and Y are going to be variable, Z is fixed to
419 * zero and W to one. Header words dw0-3 are all zero. There is no need to
420 * include the fixed values in the vertex buffer. Vertex fetcher can be
421 * instructed to fill vertex elements with constant values of one and zero
422 * instead of reading them from the buffer.
423 * Flat inputs are program constants that are not interpolated. Moreover
424 * their values will be the same between vertices.
425 *
426 * See the vertex element setup below.
427 */
428 ve[0].VertexBufferIndex = 0;
429 ve[0].Valid = true;
430 ve[0].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
431 ve[0].SourceElementOffset = 0;
432 ve[0].Component0Control = VFCOMP_STORE_0;
433 ve[0].Component1Control = VFCOMP_STORE_0;
434 ve[0].Component2Control = VFCOMP_STORE_0;
435 ve[0].Component3Control = VFCOMP_STORE_0;
436
437 ve[1].VertexBufferIndex = 0;
438 ve[1].Valid = true;
439 ve[1].SourceElementFormat = ISL_FORMAT_R32G32_FLOAT;
440 ve[1].SourceElementOffset = 0;
441 ve[1].Component0Control = VFCOMP_STORE_SRC;
442 ve[1].Component1Control = VFCOMP_STORE_SRC;
443 ve[1].Component2Control = VFCOMP_STORE_0;
444 ve[1].Component3Control = VFCOMP_STORE_1_FP;
445
446 for (unsigned i = 0; i < num_varyings; ++i) {
447 ve[i + 2].VertexBufferIndex = 1;
448 ve[i + 2].Valid = true;
449 ve[i + 2].SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT;
450 ve[i + 2].SourceElementOffset = i * 4 * sizeof(float);
451 ve[i + 2].Component0Control = VFCOMP_STORE_SRC;
452 ve[i + 2].Component1Control = VFCOMP_STORE_SRC;
453 ve[i + 2].Component2Control = VFCOMP_STORE_SRC;
454 ve[i + 2].Component3Control = VFCOMP_STORE_SRC;
455 }
456
457 const unsigned num_dwords =
458 1 + GENX(VERTEX_ELEMENT_STATE_length) * num_elements;
459 uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_ELEMENTS), num_dwords);
460
461 for (unsigned i = 0; i < num_elements; i++) {
462 GENX(VERTEX_ELEMENT_STATE_pack)(batch, dw, &ve[i]);
463 dw += GENX(VERTEX_ELEMENT_STATE_length);
464 }
465
466 #if GEN_GEN >= 8
467 blorp_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
468
469 for (unsigned i = 0; i < num_elements; i++) {
470 blorp_emit(batch, GENX(3DSTATE_VF_INSTANCING), vf) {
471 vf.VertexElementIndex = i;
472 vf.InstancingEnable = false;
473 }
474 }
475
476 blorp_emit(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
477 topo.PrimitiveTopologyType = _3DPRIM_RECTLIST;
478 }
479 #endif
480 }
481
482 static void
483 blorp_emit_sf_config(struct blorp_batch *batch,
484 const struct brw_blorp_params *params)
485 {
486 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
487
488 /* 3DSTATE_SF
489 *
490 * Disable ViewportTransformEnable (dw2.1)
491 *
492 * From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
493 * Primitives Overview":
494 * RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
495 * use of screen- space coordinates).
496 *
497 * A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
498 * and BackFaceFillMode (dw2.5:6) to SOLID(0).
499 *
500 * From the Sandy Bridge PRM, Volume 2, Part 1, Section
501 * 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
502 * SOLID: Any triangle or rectangle object found to be front-facing
503 * is rendered as a solid object. This setting is required when
504 * (rendering rectangle (RECTLIST) objects.
505 */
506
507 #if GEN_GEN >= 8
508
509 blorp_emit(batch, GENX(3DSTATE_SF), sf);
510
511 blorp_emit(batch, GENX(3DSTATE_RASTER), raster) {
512 raster.CullMode = CULLMODE_NONE;
513 }
514
515 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
516 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
517 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
518 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
519 sbe.ForceVertexURBEntryReadLength = true;
520 sbe.ForceVertexURBEntryReadOffset = true;
521 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
522
523 #if GEN_GEN >= 9
524 for (unsigned i = 0; i < 32; i++)
525 sbe.AttributeActiveComponentFormat[i] = ACF_XYZW;
526 #endif
527 }
528
529 #elif GEN_GEN >= 7
530
531 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
532 sf.FrontFaceFillMode = FILL_MODE_SOLID;
533 sf.BackFaceFillMode = FILL_MODE_SOLID;
534
535 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
536 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
537
538 #if GEN_GEN == 7
539 sf.DepthBufferSurfaceFormat = params->depth_format;
540 #endif
541 }
542
543 blorp_emit(batch, GENX(3DSTATE_SBE), sbe) {
544 sbe.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
545 if (prog_data) {
546 sbe.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
547 sbe.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
548 sbe.ConstantInterpolationEnable = prog_data->flat_inputs;
549 } else {
550 sbe.NumberofSFOutputAttributes = 0;
551 sbe.VertexURBEntryReadLength = 1;
552 }
553 }
554
555 #else /* GEN_GEN <= 6 */
556
557 blorp_emit(batch, GENX(3DSTATE_SF), sf) {
558 sf.FrontFaceFillMode = FILL_MODE_SOLID;
559 sf.BackFaceFillMode = FILL_MODE_SOLID;
560
561 sf.MultisampleRasterizationMode = params->dst.surf.samples > 1 ?
562 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
563
564 sf.VertexURBEntryReadOffset = BRW_SF_URB_ENTRY_READ_OFFSET;
565 if (prog_data) {
566 sf.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
567 sf.VertexURBEntryReadLength = brw_blorp_get_urb_length(prog_data);
568 sf.ConstantInterpolationEnable = prog_data->flat_inputs;
569 } else {
570 sf.NumberofSFOutputAttributes = 0;
571 sf.VertexURBEntryReadLength = 1;
572 }
573 }
574
575 #endif /* GEN_GEN */
576 }
577
578 static void
579 blorp_emit_ps_config(struct blorp_batch *batch,
580 const struct brw_blorp_params *params)
581 {
582 const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
583
584 /* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
585 * nonzero to prevent the GPU from hanging. While the documentation doesn't
586 * mention this explicitly, it notes that the valid range for the field is
587 * [1,39] = [2,40] threads, which excludes zero.
588 *
589 * To be safe (and to minimize extraneous code) we go ahead and fully
590 * configure the WM state whether or not there is a WM program.
591 */
592
593 #if GEN_GEN >= 8
594
595 blorp_emit(batch, GENX(3DSTATE_WM), wm);
596
597 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
598 if (params->src.addr.buffer) {
599 ps.SamplerCount = 1; /* Up to 4 samplers */
600 ps.BindingTableEntryCount = 2;
601 } else {
602 ps.BindingTableEntryCount = 1;
603 }
604
605 ps.DispatchGRFStartRegisterForConstantSetupData0 =
606 prog_data->first_curbe_grf_0;
607 ps.DispatchGRFStartRegisterForConstantSetupData2 =
608 prog_data->first_curbe_grf_2;
609
610 ps._8PixelDispatchEnable = prog_data->dispatch_8;
611 ps._16PixelDispatchEnable = prog_data->dispatch_16;
612
613 ps.KernelStartPointer0 = params->wm_prog_kernel;
614 ps.KernelStartPointer2 =
615 params->wm_prog_kernel + prog_data->ksp_offset_2;
616
617 /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
618 * it implicitly scales for different GT levels (which have some # of
619 * PSDs).
620 *
621 * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
622 */
623 if (GEN_GEN >= 9)
624 ps.MaximumNumberofThreadsPerPSD = 64 - 1;
625 else
626 ps.MaximumNumberofThreadsPerPSD = 64 - 2;
627
628 switch (params->fast_clear_op) {
629 #if GEN_GEN >= 9
630 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
631 ps.RenderTargetResolveType = RESOLVE_PARTIAL;
632 break;
633 case (3 << 6): /* GEN9_PS_RENDER_TARGET_RESOLVE_FULL */
634 ps.RenderTargetResolveType = RESOLVE_FULL;
635 break;
636 #else
637 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
638 ps.RenderTargetResolveEnable = true;
639 break;
640 #endif
641 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
642 ps.RenderTargetFastClearEnable = true;
643 break;
644 }
645 }
646
647 blorp_emit(batch, GENX(3DSTATE_PS_EXTRA), psx) {
648 psx.PixelShaderValid = true;
649
650 if (params->src.addr.buffer)
651 psx.PixelShaderKillsPixel = true;
652
653 psx.AttributeEnable = prog_data->num_varying_inputs > 0;
654
655 if (prog_data && prog_data->persample_msaa_dispatch)
656 psx.PixelShaderIsPerSample = true;
657 }
658
659 #elif GEN_GEN >= 7
660
661 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
662 switch (params->hiz_op) {
663 case GEN6_HIZ_OP_DEPTH_CLEAR:
664 wm.DepthBufferClear = true;
665 break;
666 case GEN6_HIZ_OP_DEPTH_RESOLVE:
667 wm.DepthBufferResolveEnable = true;
668 break;
669 case GEN6_HIZ_OP_HIZ_RESOLVE:
670 wm.HierarchicalDepthBufferResolveEnable = true;
671 break;
672 case GEN6_HIZ_OP_NONE:
673 break;
674 default:
675 unreachable("not reached");
676 }
677
678 if (prog_data)
679 wm.ThreadDispatchEnable = true;
680
681 if (params->src.addr.buffer)
682 wm.PixelShaderKillPixel = true;
683
684 if (params->dst.surf.samples > 1) {
685 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
686 wm.MultisampleDispatchMode =
687 (prog_data && prog_data->persample_msaa_dispatch) ?
688 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
689 } else {
690 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
691 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
692 }
693 }
694
695 blorp_emit(batch, GENX(3DSTATE_PS), ps) {
696 ps.MaximumNumberofThreads =
697 batch->blorp->isl_dev->info->max_wm_threads - 1;
698
699 #if GEN_IS_HASWELL
700 ps.SampleMask = 1;
701 #endif
702
703 if (prog_data) {
704 ps.DispatchGRFStartRegisterforConstantSetupData0 =
705 prog_data->first_curbe_grf_0;
706 ps.DispatchGRFStartRegisterforConstantSetupData2 =
707 prog_data->first_curbe_grf_2;
708
709 ps.KernelStartPointer0 = params->wm_prog_kernel;
710 ps.KernelStartPointer2 =
711 params->wm_prog_kernel + prog_data->ksp_offset_2;
712
713 ps._8PixelDispatchEnable = prog_data->dispatch_8;
714 ps._16PixelDispatchEnable = prog_data->dispatch_16;
715
716 ps.AttributeEnable = prog_data->num_varying_inputs > 0;
717 } else {
718 /* Gen7 hardware gets angry if we don't enable at least one dispatch
719 * mode, so just enable 16-pixel dispatch if we don't have a program.
720 */
721 ps._16PixelDispatchEnable = true;
722 }
723
724 if (params->src.addr.buffer)
725 ps.SamplerCount = 1; /* Up to 4 samplers */
726
727 switch (params->fast_clear_op) {
728 case (1 << 6): /* GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE */
729 ps.RenderTargetResolveEnable = true;
730 break;
731 case (1 << 8): /* GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE */
732 ps.RenderTargetFastClearEnable = true;
733 break;
734 }
735 }
736
737 #else /* GEN_GEN <= 6 */
738
739 blorp_emit(batch, GENX(3DSTATE_WM), wm) {
740 wm.MaximumNumberofThreads =
741 batch->blorp->isl_dev->info->max_wm_threads - 1;
742
743 switch (params->hiz_op) {
744 case GEN6_HIZ_OP_DEPTH_CLEAR:
745 wm.DepthBufferClear = true;
746 break;
747 case GEN6_HIZ_OP_DEPTH_RESOLVE:
748 wm.DepthBufferResolveEnable = true;
749 break;
750 case GEN6_HIZ_OP_HIZ_RESOLVE:
751 wm.HierarchicalDepthBufferResolveEnable = true;
752 break;
753 case GEN6_HIZ_OP_NONE:
754 break;
755 default:
756 unreachable("not reached");
757 }
758
759 if (prog_data) {
760 wm.ThreadDispatchEnable = true;
761
762 wm.DispatchGRFStartRegisterforConstantSetupData0 =
763 prog_data->first_curbe_grf_0;
764 wm.DispatchGRFStartRegisterforConstantSetupData2 =
765 prog_data->first_curbe_grf_2;
766
767 wm.KernelStartPointer0 = params->wm_prog_kernel;
768 wm.KernelStartPointer2 =
769 params->wm_prog_kernel + prog_data->ksp_offset_2;
770
771 wm._8PixelDispatchEnable = prog_data->dispatch_8;
772 wm._16PixelDispatchEnable = prog_data->dispatch_16;
773
774 wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
775 }
776
777 if (params->src.addr.buffer) {
778 wm.SamplerCount = 1; /* Up to 4 samplers */
779 wm.PixelShaderKillPixel = true; /* TODO: temporarily smash on */
780 }
781
782 if (params->dst.surf.samples > 1) {
783 wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
784 wm.MultisampleDispatchMode =
785 (prog_data && prog_data->persample_msaa_dispatch) ?
786 MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
787 } else {
788 wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
789 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
790 }
791 }
792
793 #endif /* GEN_GEN */
794 }
795
796
797 static void
798 blorp_emit_depth_stencil_config(struct blorp_batch *batch,
799 const struct brw_blorp_params *params)
800 {
801 #if GEN_GEN >= 7
802 const uint32_t mocs = 1; /* GEN7_MOCS_L3 */
803 #else
804 const uint32_t mocs = 0;
805 #endif
806
807 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
808 switch (params->depth.surf.dim) {
809 case ISL_SURF_DIM_1D:
810 db.SurfaceType = SURFTYPE_1D;
811 break;
812 case ISL_SURF_DIM_2D:
813 db.SurfaceType = SURFTYPE_2D;
814 break;
815 case ISL_SURF_DIM_3D:
816 db.SurfaceType = SURFTYPE_3D;
817 break;
818 }
819
820 db.SurfaceFormat = params->depth_format;
821
822 #if GEN_GEN >= 7
823 db.DepthWriteEnable = true;
824 #endif
825
826 #if GEN_GEN <= 6
827 db.TiledSurface = true;
828 db.TileWalk = TILEWALK_YMAJOR;
829 db.MIPMapLayoutMode = MIPLAYOUT_BELOW;
830 db.SeparateStencilBufferEnable = true;
831 #endif
832
833 db.HierarchicalDepthBufferEnable = true;
834
835 db.Width = params->depth.surf.logical_level0_px.width - 1;
836 db.Height = params->depth.surf.logical_level0_px.height - 1;
837 db.RenderTargetViewExtent = db.Depth =
838 MAX2(params->depth.surf.logical_level0_px.depth,
839 params->depth.surf.logical_level0_px.array_len) - 1;
840
841 db.LOD = params->depth.view.base_level;
842 db.MinimumArrayElement = params->depth.view.base_array_layer;
843
844 db.SurfacePitch = params->depth.surf.row_pitch - 1;
845 db.SurfaceBaseAddress = params->depth.addr;
846 db.DepthBufferMOCS = mocs;
847 }
848
849 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz) {
850 hiz.SurfacePitch = params->depth.aux_surf.row_pitch - 1;
851 hiz.SurfaceBaseAddress = params->depth.aux_addr;
852 hiz.HierarchicalDepthBufferMOCS = mocs;
853 }
854
855 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
856 }
857
858 static uint32_t
859 blorp_emit_blend_state(struct blorp_batch *batch,
860 const struct brw_blorp_params *params)
861 {
862 struct GENX(BLEND_STATE) blend;
863 memset(&blend, 0, sizeof(blend));
864
865 for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
866 blend.Entry[i].PreBlendColorClampEnable = true;
867 blend.Entry[i].PostBlendColorClampEnable = true;
868 blend.Entry[i].ColorClampRange = COLORCLAMP_RTFORMAT;
869
870 blend.Entry[i].WriteDisableRed = params->color_write_disable[0];
871 blend.Entry[i].WriteDisableGreen = params->color_write_disable[1];
872 blend.Entry[i].WriteDisableBlue = params->color_write_disable[2];
873 blend.Entry[i].WriteDisableAlpha = params->color_write_disable[3];
874 }
875
876 uint32_t offset;
877 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_BLEND_STATE,
878 GENX(BLEND_STATE_length) * 4,
879 64, &offset);
880 GENX(BLEND_STATE_pack)(NULL, state, &blend);
881
882 #if GEN_GEN >= 7
883 blorp_emit(batch, GENX(3DSTATE_BLEND_STATE_POINTERS), sp) {
884 sp.BlendStatePointer = offset;
885 #if GEN_GEN >= 8
886 sp.BlendStatePointerValid = true;
887 #endif
888 }
889 #endif
890
891 #if GEN_GEN >= 8
892 blorp_emit(batch, GENX(3DSTATE_PS_BLEND), ps_blend) {
893 ps_blend.HasWriteableRT = true;
894 }
895 #endif
896
897 return offset;
898 }
899
900 static uint32_t
901 blorp_emit_color_calc_state(struct blorp_batch *batch,
902 const struct brw_blorp_params *params)
903 {
904 uint32_t offset;
905 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_STATE,
906 GENX(COLOR_CALC_STATE_length) * 4,
907 64, &offset);
908 memset(state, 0, GENX(COLOR_CALC_STATE_length) * 4);
909
910 #if GEN_GEN >= 7
911 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), sp) {
912 sp.ColorCalcStatePointer = offset;
913 #if GEN_GEN >= 8
914 sp.ColorCalcStatePointerValid = true;
915 #endif
916 }
917 #endif
918
919 return offset;
920 }
921
922 static uint32_t
923 blorp_emit_depth_stencil_state(struct blorp_batch *batch,
924 const struct brw_blorp_params *params)
925 {
926 #if GEN_GEN >= 8
927
928 /* On gen8+, DEPTH_STENCIL state is simply an instruction */
929 blorp_emit(batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds);
930 return 0;
931
932 #else /* GEN_GEN <= 7 */
933
934 /* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
935 * - 7.5.3.1 Depth Buffer Clear
936 * - 7.5.3.2 Depth Buffer Resolve
937 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
938 */
939 struct GENX(DEPTH_STENCIL_STATE) ds = {
940 .DepthBufferWriteEnable = true,
941 };
942
943 if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
944 ds.DepthTestEnable = true;
945 ds.DepthTestFunction = COMPAREFUNCTION_NEVER;
946 }
947
948 uint32_t offset;
949 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_DEPTH_STENCIL_STATE,
950 GENX(DEPTH_STENCIL_STATE_length) * 4,
951 64, &offset);
952 GENX(DEPTH_STENCIL_STATE_pack)(NULL, state, &ds);
953
954 #if GEN_GEN >= 7
955 blorp_emit(batch, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), sp) {
956 sp.PointertoDEPTH_STENCIL_STATE = offset;
957 }
958 #endif
959
960 return offset;
961
962 #endif /* GEN_GEN */
963 }
964
965 struct surface_state_info {
966 unsigned num_dwords;
967 unsigned ss_align; /* Required alignment of RENDER_SURFACE_STATE in bytes */
968 unsigned reloc_dw;
969 unsigned aux_reloc_dw;
970 };
971
972 static const struct surface_state_info surface_state_infos[] = {
973 [6] = {6, 32, 1, 0},
974 [7] = {8, 32, 1, 6},
975 [8] = {13, 64, 8, 10},
976 [9] = {16, 64, 8, 10},
977 };
978
979 static void
980 blorp_emit_surface_state(struct blorp_batch *batch,
981 const struct brw_blorp_surface_info *surface,
982 uint32_t *state, uint32_t state_offset,
983 bool is_render_target)
984 {
985 const struct surface_state_info ss_info = surface_state_infos[GEN_GEN];
986
987 struct isl_surf surf = surface->surf;
988
989 if (surf.dim == ISL_SURF_DIM_1D &&
990 surf.dim_layout == ISL_DIM_LAYOUT_GEN4_2D) {
991 assert(surf.logical_level0_px.height == 1);
992 surf.dim = ISL_SURF_DIM_2D;
993 }
994
995 /* Blorp doesn't support HiZ in any of the blit or slow-clear paths */
996 enum isl_aux_usage aux_usage = surface->aux_usage;
997 if (aux_usage == ISL_AUX_USAGE_HIZ)
998 aux_usage = ISL_AUX_USAGE_NONE;
999
1000 const uint32_t mocs =
1001 is_render_target ? batch->blorp->mocs.rb : batch->blorp->mocs.tex;
1002
1003 isl_surf_fill_state(batch->blorp->isl_dev, state,
1004 .surf = &surf, .view = &surface->view,
1005 .aux_surf = &surface->aux_surf, .aux_usage = aux_usage,
1006 .mocs = mocs, .clear_color = surface->clear_color,
1007 .x_offset_sa = surface->tile_x_sa,
1008 .y_offset_sa = surface->tile_y_sa);
1009
1010 blorp_surface_reloc(batch, state_offset + ss_info.reloc_dw * 4,
1011 surface->addr, 0);
1012
1013 if (aux_usage != ISL_AUX_USAGE_NONE) {
1014 /* On gen7 and prior, the bottom 12 bits of the MCS base address are
1015 * used to store other information. This should be ok, however, because
1016 * surface buffer addresses are always 4K page alinged.
1017 */
1018 assert((surface->aux_addr.offset & 0xfff) == 0);
1019 blorp_surface_reloc(batch, state_offset + ss_info.aux_reloc_dw * 4,
1020 surface->aux_addr, state[ss_info.aux_reloc_dw]);
1021 }
1022 }
1023
1024 static void
1025 blorp_emit_surface_states(struct blorp_batch *batch,
1026 const struct brw_blorp_params *params)
1027 {
1028 uint32_t bind_offset, *bind_map;
1029 void *surface_maps[2];
1030
1031 const unsigned ss_size = GENX(RENDER_SURFACE_STATE_length) * 4;
1032 const unsigned ss_align = GENX(RENDER_SURFACE_STATE_length) > 8 ? 64 : 32;
1033
1034 unsigned num_surfaces = 1 + (params->src.addr.buffer != NULL);
1035 blorp_alloc_binding_table(batch, num_surfaces, ss_size, ss_align,
1036 &bind_offset, &bind_map, surface_maps);
1037
1038 blorp_emit_surface_state(batch, &params->dst,
1039 surface_maps[BLORP_RENDERBUFFER_BT_INDEX],
1040 bind_map[BLORP_RENDERBUFFER_BT_INDEX], true);
1041 if (params->src.addr.buffer) {
1042 blorp_emit_surface_state(batch, &params->src,
1043 surface_maps[BLORP_TEXTURE_BT_INDEX],
1044 bind_map[BLORP_TEXTURE_BT_INDEX], false);
1045 }
1046
1047 #if GEN_GEN >= 7
1048 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
1049 bt.PointertoPSBindingTable = bind_offset;
1050 }
1051 #else
1052 blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS), bt) {
1053 bt.PSBindingTableChange = true;
1054 bt.PointertoPSBindingTable = bind_offset;
1055 }
1056 #endif
1057 }
1058
1059 static void
1060 blorp_emit_sampler_state(struct blorp_batch *batch,
1061 const struct brw_blorp_params *params)
1062 {
1063 struct GENX(SAMPLER_STATE) sampler = {
1064 .MipModeFilter = MIPFILTER_NONE,
1065 .MagModeFilter = MAPFILTER_LINEAR,
1066 .MinModeFilter = MAPFILTER_LINEAR,
1067 .MinLOD = 0,
1068 .MaxLOD = 0,
1069 .TCXAddressControlMode = TCM_CLAMP,
1070 .TCYAddressControlMode = TCM_CLAMP,
1071 .TCZAddressControlMode = TCM_CLAMP,
1072 .MaximumAnisotropy = RATIO21,
1073 .RAddressMinFilterRoundingEnable = true,
1074 .RAddressMagFilterRoundingEnable = true,
1075 .VAddressMinFilterRoundingEnable = true,
1076 .VAddressMagFilterRoundingEnable = true,
1077 .UAddressMinFilterRoundingEnable = true,
1078 .UAddressMagFilterRoundingEnable = true,
1079 .NonnormalizedCoordinateEnable = true,
1080 };
1081
1082 uint32_t offset;
1083 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_SAMPLER_STATE,
1084 GENX(SAMPLER_STATE_length) * 4,
1085 32, &offset);
1086 GENX(SAMPLER_STATE_pack)(NULL, state, &sampler);
1087
1088 #if GEN_GEN >= 7
1089 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_PS), ssp) {
1090 ssp.PointertoPSSamplerState = offset;
1091 }
1092 #else
1093 blorp_emit(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS), ssp) {
1094 ssp.VSSamplerStateChange = true;
1095 ssp.GSSamplerStateChange = true;
1096 ssp.PSSamplerStateChange = true;
1097 ssp.PointertoPSSamplerState = offset;
1098 }
1099 #endif
1100 }
1101
1102 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
1103 static void
1104 blorp_emit_viewport_state(struct blorp_batch *batch,
1105 const struct brw_blorp_params *params)
1106 {
1107 uint32_t cc_vp_offset;
1108
1109 void *state = blorp_alloc_dynamic_state(batch, AUB_TRACE_CC_VP_STATE,
1110 GENX(CC_VIEWPORT_length) * 4, 32,
1111 &cc_vp_offset);
1112
1113 GENX(CC_VIEWPORT_pack)(batch, state,
1114 &(struct GENX(CC_VIEWPORT)) {
1115 .MinimumDepth = 0.0,
1116 .MaximumDepth = 1.0,
1117 });
1118
1119 #if GEN_GEN >= 7
1120 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), vsp) {
1121 vsp.CCViewportPointer = cc_vp_offset;
1122 }
1123 #else
1124 blorp_emit(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS), vsp) {
1125 vsp.CCViewportStateChange = true;
1126 vsp.PointertoCC_VIEWPORT = cc_vp_offset;
1127 }
1128 #endif
1129 }
1130
1131
1132 /**
1133 * \brief Execute a blit or render pass operation.
1134 *
1135 * To execute the operation, this function manually constructs and emits a
1136 * batch to draw a rectangle primitive. The batchbuffer is flushed before
1137 * constructing and after emitting the batch.
1138 *
1139 * This function alters no GL state.
1140 */
1141 static void
1142 blorp_exec(struct blorp_batch *batch,
1143 const struct brw_blorp_params *params)
1144 {
1145 uint32_t blend_state_offset = 0;
1146 uint32_t color_calc_state_offset = 0;
1147 uint32_t depth_stencil_state_offset;
1148
1149 blorp_emit_vertex_buffers(batch, params);
1150 blorp_emit_vertex_elements(batch, params);
1151
1152 emit_urb_config(batch, params);
1153
1154 if (params->wm_prog_data) {
1155 blend_state_offset = blorp_emit_blend_state(batch, params);
1156 color_calc_state_offset = blorp_emit_color_calc_state(batch, params);
1157 }
1158 depth_stencil_state_offset = blorp_emit_depth_stencil_state(batch, params);
1159
1160 #if GEN_GEN <= 6
1161 /* 3DSTATE_CC_STATE_POINTERS
1162 *
1163 * The pointer offsets are relative to
1164 * CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
1165 *
1166 * The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
1167 *
1168 * The dynamic state emit helpers emit their own STATE_POINTERS packets on
1169 * gen7+. However, on gen6 and earlier, they're all lumpped together in
1170 * one CC_STATE_POINTERS packet so we have to emit that here.
1171 */
1172 blorp_emit(batch, GENX(3DSTATE_CC_STATE_POINTERS), cc) {
1173 cc.BLEND_STATEChange = true;
1174 cc.COLOR_CALC_STATEChange = true;
1175 cc.DEPTH_STENCIL_STATEChange = true;
1176 cc.PointertoBLEND_STATE = blend_state_offset;
1177 cc.PointertoCOLOR_CALC_STATE = color_calc_state_offset;
1178 cc.PointertoDEPTH_STENCIL_STATE = depth_stencil_state_offset;
1179 }
1180 #else
1181 (void)blend_state_offset;
1182 (void)color_calc_state_offset;
1183 (void)depth_stencil_state_offset;
1184 #endif
1185
1186 blorp_emit(batch, GENX(3DSTATE_CONSTANT_VS), vs);
1187 #if GEN_GEN >= 7
1188 blorp_emit(batch, GENX(3DSTATE_CONSTANT_HS), hs);
1189 blorp_emit(batch, GENX(3DSTATE_CONSTANT_DS), DS);
1190 #endif
1191 blorp_emit(batch, GENX(3DSTATE_CONSTANT_GS), gs);
1192 blorp_emit(batch, GENX(3DSTATE_CONSTANT_PS), ps);
1193
1194 if (params->wm_prog_data)
1195 blorp_emit_surface_states(batch, params);
1196
1197 if (params->src.addr.buffer)
1198 blorp_emit_sampler_state(batch, params);
1199
1200 blorp_emit_3dstate_multisample(batch, params->dst.surf.samples);
1201
1202 blorp_emit(batch, GENX(3DSTATE_SAMPLE_MASK), mask) {
1203 mask.SampleMask = (1 << params->dst.surf.samples) - 1;
1204 }
1205
1206 /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
1207 * 3DSTATE_VS, Dword 5.0 "VS Function Enable":
1208 *
1209 * [DevSNB] A pipeline flush must be programmed prior to a
1210 * 3DSTATE_VS command that causes the VS Function Enable to
1211 * toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
1212 * command with CS stall bit set and a post sync operation.
1213 *
1214 * We've already done one at the start of the BLORP operation.
1215 */
1216 blorp_emit(batch, GENX(3DSTATE_VS), vs);
1217 #if GEN_GEN >= 7
1218 blorp_emit(batch, GENX(3DSTATE_HS), hs);
1219 blorp_emit(batch, GENX(3DSTATE_TE), te);
1220 blorp_emit(batch, GENX(3DSTATE_DS), DS);
1221 blorp_emit(batch, GENX(3DSTATE_STREAMOUT), so);
1222 #endif
1223 blorp_emit(batch, GENX(3DSTATE_GS), gs);
1224
1225 blorp_emit(batch, GENX(3DSTATE_CLIP), clip) {
1226 clip.PerspectiveDivideDisable = true;
1227 }
1228
1229 blorp_emit_sf_config(batch, params);
1230 blorp_emit_ps_config(batch, params);
1231
1232 blorp_emit_viewport_state(batch, params);
1233
1234 if (params->depth.addr.buffer) {
1235 blorp_emit_depth_stencil_config(batch, params);
1236 } else {
1237 blorp_emit(batch, GENX(3DSTATE_DEPTH_BUFFER), db) {
1238 db.SurfaceType = SURFTYPE_NULL;
1239 db.SurfaceFormat = D32_FLOAT;
1240 }
1241 blorp_emit(batch, GENX(3DSTATE_HIER_DEPTH_BUFFER), hiz);
1242 blorp_emit(batch, GENX(3DSTATE_STENCIL_BUFFER), sb);
1243 }
1244
1245 /* 3DSTATE_CLEAR_PARAMS
1246 *
1247 * From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
1248 * [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
1249 * packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
1250 */
1251 blorp_emit(batch, GENX(3DSTATE_CLEAR_PARAMS), clear) {
1252 clear.DepthClearValueValid = true;
1253 clear.DepthClearValue = params->depth.clear_color.u32[0];
1254 }
1255
1256 blorp_emit(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {
1257 rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;
1258 rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;
1259 }
1260
1261 blorp_emit(batch, GENX(3DPRIMITIVE), prim) {
1262 prim.VertexAccessType = SEQUENTIAL;
1263 prim.PrimitiveTopologyType = _3DPRIM_RECTLIST;
1264 prim.VertexCountPerInstance = 3;
1265 prim.InstanceCount = params->num_layers;
1266 }
1267 }
1268
1269 void
1270 genX(blorp_exec)(struct brw_context *brw,
1271 const struct brw_blorp_params *params)
1272 {
1273 #if GEN_GEN == 6
1274 /* Emit workaround flushes when we switch from drawing to blorping. */
1275 brw_emit_post_sync_nonzero_flush(brw);
1276 #endif
1277
1278 brw_upload_state_base_address(brw);
1279
1280 #if GEN_GEN >= 8
1281 gen7_l3_state.emit(brw);
1282 #endif
1283
1284 if (brw->use_resource_streamer)
1285 gen7_disable_hw_binding_tables(brw);
1286
1287 brw_emit_depth_stall_flushes(brw);
1288
1289 struct blorp_batch batch;
1290 blorp_batch_init(&brw->blorp, &batch, brw);
1291 blorp_exec(&batch, params);
1292 blorp_batch_finish(&batch);
1293 }