ilo: improve WA handling in rectlist path
[mesa.git] / src / gallium / drivers / ilo / ilo_render_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30
31 #include "ilo_blitter.h"
32 #include "ilo_builder_3d.h"
33 #include "ilo_builder_render.h"
34 #include "ilo_shader.h"
35 #include "ilo_state.h"
36 #include "ilo_render_gen.h"
37
38 /**
39 * A wrapper for gen6_PIPE_CONTROL().
40 */
41 static void
42 gen7_pipe_control(struct ilo_render *r, uint32_t dw1)
43 {
44 struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
45 r->workaround_bo : NULL;
46
47 ILO_DEV_ASSERT(r->dev, 7, 7.5);
48
49 if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
50 /* CS stall cannot be set alone */
51 const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
52 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
53 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
54 GEN6_PIPE_CONTROL_DEPTH_STALL |
55 GEN6_PIPE_CONTROL_WRITE__MASK;
56 if (!(dw1 & mask))
57 dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
58 }
59
60 gen6_PIPE_CONTROL(r->builder, dw1, bo, 0, 0);
61
62 r->state.current_pipe_control_dw1 |= dw1;
63 r->state.deferred_pipe_control_dw1 &= ~dw1;
64 }
65
66 static void
67 gen7_3dprimitive(struct ilo_render *r,
68 const struct pipe_draw_info *info,
69 const struct ilo_ib_state *ib)
70 {
71 ILO_DEV_ASSERT(r->dev, 7, 7.5);
72
73 if (r->state.deferred_pipe_control_dw1)
74 gen7_pipe_control(r, r->state.deferred_pipe_control_dw1);
75
76 /* 3DPRIMITIVE */
77 gen7_3DPRIMITIVE(r->builder, info, ib);
78
79 r->state.current_pipe_control_dw1 = 0;
80 r->state.deferred_pipe_control_dw1 = 0;
81 }
82
83 static void
84 gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_render *r)
85 {
86 /*
87 * From the Ivy Bridge PRM, volume 2 part 1, page 292:
88 *
89 * "A PIPE_CONTOL command with the CS Stall bit set must be programmed
90 * in the ring after this instruction
91 * (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
92 */
93 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
94
95 ILO_DEV_ASSERT(r->dev, 7, 7);
96
97 r->state.deferred_pipe_control_dw1 |= dw1;
98 }
99
100 static void
101 gen7_wa_pre_vs(struct ilo_render *r)
102 {
103 /*
104 * From the Ivy Bridge PRM, volume 2 part 1, page 106:
105 *
106 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
107 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
108 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
109 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
110 * needs to be sent before any combination of VS associated 3DSTATE."
111 */
112 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
113 GEN6_PIPE_CONTROL_WRITE_IMM;
114
115 ILO_DEV_ASSERT(r->dev, 7, 7);
116
117 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
118 gen7_pipe_control(r, dw1);
119 }
120
121 static void
122 gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_render *r)
123 {
124 /*
125 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
126 *
127 * "Due to an HW issue driver needs to send a pipe control with stall
128 * when ever there is state change in depth bias related state (in
129 * 3DSTATE_SF)"
130 */
131 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
132
133 ILO_DEV_ASSERT(r->dev, 7, 7);
134
135 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
136 gen7_pipe_control(r, dw1);
137 }
138
139 static void
140 gen7_wa_pre_3dstate_multisample(struct ilo_render *r)
141 {
142 /*
143 * From the Ivy Bridge PRM, volume 2 part 1, page 304:
144 *
145 * "Driver must ierarchi that all the caches in the depth pipe are
146 * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
147 * requires driver to send a PIPE_CONTROL with a CS stall along with a
148 * Depth Flush prior to this command.
149 */
150 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
151 GEN6_PIPE_CONTROL_CS_STALL;
152
153 ILO_DEV_ASSERT(r->dev, 7, 7.5);
154
155 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
156 gen7_pipe_control(r, dw1);
157 }
158
159 static void
160 gen7_wa_pre_depth(struct ilo_render *r)
161 {
162 ILO_DEV_ASSERT(r->dev, 7, 7.5);
163
164 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
165 /*
166 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
167 *
168 * "Driver must send a least one PIPE_CONTROL command with CS Stall
169 * and a post sync operation prior to the group of depth
170 * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
171 * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
172 */
173 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
174 GEN6_PIPE_CONTROL_WRITE_IMM;
175
176 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
177 gen7_pipe_control(r, dw1);
178 }
179
180 /*
181 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
182 *
183 * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
184 * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
185 * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
186 * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
187 * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
188 * Depth Flush Bit set, followed by another pipelined depth stall
189 * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
190 * guarantee that the pipeline from WM onwards is already flushed
191 * (e.g., via a preceding MI_FLUSH)."
192 */
193 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
194 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
195 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
196 }
197
198 static void
199 gen7_wa_pre_3dstate_ps_max_threads(struct ilo_render *r)
200 {
201 /*
202 * From the Ivy Bridge PRM, volume 2 part 1, page 286:
203 *
204 * "If this field (Maximum Number of Threads in 3DSTATE_PS) is changed
205 * between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
206 * Pixel Scoreboard set is required to be issued."
207 */
208 const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
209
210 ILO_DEV_ASSERT(r->dev, 7, 7.5);
211
212 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
213 gen7_pipe_control(r, dw1);
214 }
215
216 static void
217 gen7_wa_post_ps_and_later(struct ilo_render *r)
218 {
219 /*
220 * From the Ivy Bridge PRM, volume 2 part 1, page 276:
221 *
222 * "The driver must make sure a PIPE_CONTROL with the Depth Stall
223 * Enable bit set after all the following states are programmed:
224 *
225 * - 3DSTATE_PS
226 * - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
227 * - 3DSTATE_CONSTANT_PS
228 * - 3DSTATE_BINDING_TABLE_POINTERS_PS
229 * - 3DSTATE_SAMPLER_STATE_POINTERS_PS
230 * - 3DSTATE_CC_STATE_POINTERS
231 * - 3DSTATE_BLEND_STATE_POINTERS
232 * - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
233 */
234 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
235
236 ILO_DEV_ASSERT(r->dev, 7, 7);
237
238 r->state.deferred_pipe_control_dw1 |= dw1;
239 }
240
241 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
242
243 void
244 gen7_draw_common_urb(struct ilo_render *r,
245 const struct ilo_state_vector *vec,
246 struct ilo_render_draw_session *session)
247 {
248 /* 3DSTATE_URB_{VS,GS,HS,DS} */
249 if (DIRTY(VE) || DIRTY(VS)) {
250 /* the first 16KB are reserved for VS and PS PCBs */
251 const int offset =
252 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
253 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
254 32768 : 16384;
255 int vs_entry_size, vs_total_size;
256
257 vs_entry_size = (vec->vs) ?
258 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
259
260 /*
261 * From the Ivy Bridge PRM, volume 2 part 1, page 35:
262 *
263 * "Programming Restriction: As the VS URB entry serves as both the
264 * per-vertex input and output of the VS shader, the VS URB
265 * Allocation Size must be sized to the maximum of the vertex input
266 * and output structures."
267 */
268 if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
269 vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
270
271 vs_entry_size *= sizeof(float) * 4;
272 vs_total_size = r->dev->urb_size - offset;
273
274 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
275 gen7_wa_pre_vs(r);
276
277 gen7_3DSTATE_URB_VS(r->builder,
278 offset, vs_total_size, vs_entry_size);
279
280 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
281 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
282 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
283 }
284 }
285
286 void
287 gen7_draw_common_pcb_alloc(struct ilo_render *r,
288 const struct ilo_state_vector *vec,
289 struct ilo_render_draw_session *session)
290 {
291 /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
292 if (r->hw_ctx_changed) {
293 /*
294 * Push constant buffers are only allowed to take up at most the first
295 * 16KB of the URB. Split the space evenly for VS and FS.
296 */
297 const int max_size =
298 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
299 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
300 32768 : 16384;
301 const int size = max_size / 2;
302 int offset = 0;
303
304 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
305 offset += size;
306
307 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
308
309 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
310 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
311 }
312 }
313
314 void
315 gen7_draw_common_pointers_1(struct ilo_render *r,
316 const struct ilo_state_vector *vec,
317 struct ilo_render_draw_session *session)
318 {
319 /* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */
320 if (session->viewport_changed) {
321 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
322 r->state.CC_VIEWPORT);
323
324 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(r->builder,
325 r->state.SF_CLIP_VIEWPORT);
326 }
327 }
328
329 void
330 gen7_draw_common_pointers_2(struct ilo_render *r,
331 const struct ilo_state_vector *vec,
332 struct ilo_render_draw_session *session)
333 {
334 /* 3DSTATE_BLEND_STATE_POINTERS */
335 if (session->blend_changed) {
336 gen7_3DSTATE_BLEND_STATE_POINTERS(r->builder,
337 r->state.BLEND_STATE);
338 }
339
340 /* 3DSTATE_CC_STATE_POINTERS */
341 if (session->cc_changed) {
342 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
343 r->state.COLOR_CALC_STATE);
344 }
345
346 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */
347 if (ilo_dev_gen(r->dev) < ILO_GEN(8) && session->dsa_changed) {
348 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
349 r->state.DEPTH_STENCIL_STATE);
350 }
351 }
352
353 void
354 gen7_draw_vs(struct ilo_render *r,
355 const struct ilo_state_vector *vec,
356 struct ilo_render_draw_session *session)
357 {
358 const bool emit_3dstate_binding_table = session->binding_table_vs_changed;
359 const bool emit_3dstate_sampler_state = session->sampler_vs_changed;
360 /* see gen6_draw_vs() */
361 const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
362 const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
363
364 /* emit depth stall before any of the VS commands */
365 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
366 if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
367 emit_3dstate_constant_vs || emit_3dstate_vs)
368 gen7_wa_pre_vs(r);
369 }
370
371 /* 3DSTATE_BINDING_TABLE_POINTERS_VS */
372 if (emit_3dstate_binding_table) {
373 gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(r->builder,
374 r->state.vs.BINDING_TABLE_STATE);
375 }
376
377 /* 3DSTATE_SAMPLER_STATE_POINTERS_VS */
378 if (emit_3dstate_sampler_state) {
379 gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(r->builder,
380 r->state.vs.SAMPLER_STATE);
381 }
382
383 /* 3DSTATE_CONSTANT_VS */
384 if (emit_3dstate_constant_vs) {
385 gen7_3DSTATE_CONSTANT_VS(r->builder,
386 &r->state.vs.PUSH_CONSTANT_BUFFER,
387 &r->state.vs.PUSH_CONSTANT_BUFFER_size,
388 1);
389 }
390
391 /* 3DSTATE_VS */
392 if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
393 if (emit_3dstate_vs || DIRTY(RASTERIZER)) {
394 gen8_3DSTATE_VS(r->builder, vec->vs,
395 vec->rasterizer->state.clip_plane_enable);
396 }
397 } else {
398 if (emit_3dstate_vs)
399 gen6_3DSTATE_VS(r->builder, vec->vs);
400 }
401 }
402
403 void
404 gen7_draw_hs(struct ilo_render *r,
405 const struct ilo_state_vector *vec,
406 struct ilo_render_draw_session *session)
407 {
408 /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
409 if (r->hw_ctx_changed) {
410 gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
411 gen7_disable_3DSTATE_HS(r->builder);
412 }
413
414 /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
415 if (r->hw_ctx_changed)
416 gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(r->builder, 0);
417 }
418
419 void
420 gen7_draw_te(struct ilo_render *r,
421 const struct ilo_state_vector *vec,
422 struct ilo_render_draw_session *session)
423 {
424 /* 3DSTATE_TE */
425 if (r->hw_ctx_changed)
426 gen7_3DSTATE_TE(r->builder);
427 }
428
429 void
430 gen7_draw_ds(struct ilo_render *r,
431 const struct ilo_state_vector *vec,
432 struct ilo_render_draw_session *session)
433 {
434 /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
435 if (r->hw_ctx_changed) {
436 gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
437 gen7_disable_3DSTATE_DS(r->builder);
438 }
439
440 /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
441 if (r->hw_ctx_changed)
442 gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(r->builder, 0);
443
444 }
445
446 void
447 gen7_draw_gs(struct ilo_render *r,
448 const struct ilo_state_vector *vec,
449 struct ilo_render_draw_session *session)
450 {
451 /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
452 if (r->hw_ctx_changed) {
453 gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
454 gen7_disable_3DSTATE_GS(r->builder);
455 }
456
457 /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
458 if (session->binding_table_gs_changed) {
459 gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(r->builder,
460 r->state.gs.BINDING_TABLE_STATE);
461 }
462 }
463
464 void
465 gen7_draw_sol(struct ilo_render *r,
466 const struct ilo_state_vector *vec,
467 struct ilo_render_draw_session *session)
468 {
469 const struct pipe_stream_output_info *so_info;
470 const struct ilo_shader_state *shader;
471 bool dirty_sh = false;
472
473 if (vec->gs) {
474 shader = vec->gs;
475 dirty_sh = DIRTY(GS);
476 }
477 else {
478 shader = vec->vs;
479 dirty_sh = DIRTY(VS);
480 }
481
482 so_info = ilo_shader_get_kernel_so_info(shader);
483
484 /* 3DSTATE_SO_BUFFER */
485 if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
486 vec->so.enabled) {
487 int i;
488
489 for (i = 0; i < vec->so.count; i++) {
490 const int stride = so_info->stride[i] * 4; /* in bytes */
491
492 gen7_3DSTATE_SO_BUFFER(r->builder, i, stride, vec->so.states[i]);
493 }
494
495 for (; i < 4; i++)
496 gen7_disable_3DSTATE_SO_BUFFER(r->builder, i);
497 }
498
499 /* 3DSTATE_SO_DECL_LIST */
500 if (dirty_sh && vec->so.enabled)
501 gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
502
503 /* 3DSTATE_STREAMOUT */
504 if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
505 const int output_count = ilo_shader_get_kernel_param(shader,
506 ILO_KERNEL_OUTPUT_COUNT);
507 int buf_strides[4] = { 0, 0, 0, 0 };
508 int i;
509
510 for (i = 0; i < vec->so.count; i++)
511 buf_strides[i] = so_info->stride[i] * 4;
512
513 gen7_3DSTATE_STREAMOUT(r->builder, 0,
514 vec->rasterizer->state.rasterizer_discard,
515 output_count, buf_strides);
516 }
517 }
518
519 static void
520 gen7_draw_sf(struct ilo_render *r,
521 const struct ilo_state_vector *vec,
522 struct ilo_render_draw_session *session)
523 {
524 /* 3DSTATE_SBE */
525 if (DIRTY(RASTERIZER) || DIRTY(FS)) {
526 gen7_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
527 vec->rasterizer->state.sprite_coord_mode : 0);
528 }
529
530 /* 3DSTATE_SF */
531 if (DIRTY(RASTERIZER) || DIRTY(FB)) {
532 struct pipe_surface *zs = vec->fb.state.zsbuf;
533
534 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
535 gen7_wa_pre_3dstate_sf_depth_bias(r);
536
537 gen7_3DSTATE_SF(r->builder,
538 (vec->rasterizer) ? &vec->rasterizer->sf : NULL,
539 (zs) ? zs->format : PIPE_FORMAT_NONE,
540 vec->fb.num_samples);
541 }
542 }
543
544 static void
545 gen7_draw_wm(struct ilo_render *r,
546 const struct ilo_state_vector *vec,
547 struct ilo_render_draw_session *session)
548 {
549 /* 3DSTATE_WM */
550 if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
551 const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
552 vec->blend->alpha_to_coverage);
553
554 gen7_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, cc_may_kill);
555 }
556
557 /* 3DSTATE_BINDING_TABLE_POINTERS_PS */
558 if (session->binding_table_fs_changed) {
559 gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(r->builder,
560 r->state.wm.BINDING_TABLE_STATE);
561 }
562
563 /* 3DSTATE_SAMPLER_STATE_POINTERS_PS */
564 if (session->sampler_fs_changed) {
565 gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(r->builder,
566 r->state.wm.SAMPLER_STATE);
567 }
568
569 /* 3DSTATE_CONSTANT_PS */
570 if (session->pcb_fs_changed) {
571 gen7_3DSTATE_CONSTANT_PS(r->builder,
572 &r->state.wm.PUSH_CONSTANT_BUFFER,
573 &r->state.wm.PUSH_CONSTANT_BUFFER_size,
574 1);
575 }
576
577 /* 3DSTATE_PS */
578 if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
579 const bool dual_blend = vec->blend->dual_blend;
580
581 if (r->hw_ctx_changed)
582 gen7_wa_pre_3dstate_ps_max_threads(r);
583
584 gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
585 }
586
587 /* 3DSTATE_SCISSOR_STATE_POINTERS */
588 if (session->scissor_changed) {
589 gen6_3DSTATE_SCISSOR_STATE_POINTERS(r->builder,
590 r->state.SCISSOR_RECT);
591 }
592
593 {
594 const bool emit_3dstate_ps = (DIRTY(FS) || DIRTY(BLEND));
595 const bool emit_3dstate_depth_buffer =
596 (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed);
597
598 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
599 /* XXX what is the best way to know if this workaround is needed? */
600 if (emit_3dstate_ps ||
601 session->pcb_fs_changed ||
602 session->viewport_changed ||
603 session->binding_table_fs_changed ||
604 session->sampler_fs_changed ||
605 session->cc_changed ||
606 session->blend_changed ||
607 session->dsa_changed)
608 gen7_wa_post_ps_and_later(r);
609 }
610
611 if (emit_3dstate_depth_buffer)
612 gen7_wa_pre_depth(r);
613 }
614
615 /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
616 if (DIRTY(FB) || r->batch_bo_changed) {
617 const struct ilo_zs_surface *zs;
618 uint32_t clear_params;
619
620 if (vec->fb.state.zsbuf) {
621 const struct ilo_surface_cso *surface =
622 (const struct ilo_surface_cso *) vec->fb.state.zsbuf;
623 const struct ilo_texture_slice *slice =
624 ilo_texture_get_slice(ilo_texture(surface->base.texture),
625 surface->base.u.tex.level, surface->base.u.tex.first_layer);
626
627 assert(!surface->is_rt);
628 zs = &surface->u.zs;
629 clear_params = slice->clear_value;
630 }
631 else {
632 zs = &vec->fb.null_zs;
633 clear_params = 0;
634 }
635
636 gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
637 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
638 gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
639 gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
640 }
641 }
642
643 static void
644 gen7_draw_wm_multisample(struct ilo_render *r,
645 const struct ilo_state_vector *vec,
646 struct ilo_render_draw_session *session)
647 {
648 /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
649 if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
650 const uint32_t *pattern;
651
652 gen7_wa_pre_3dstate_multisample(r);
653
654 pattern = (vec->fb.num_samples > 4) ? r->sample_pattern_8x :
655 (vec->fb.num_samples > 1) ? &r->sample_pattern_4x :
656 &r->sample_pattern_1x;
657
658 gen6_3DSTATE_MULTISAMPLE(r->builder,
659 vec->fb.num_samples, pattern,
660 vec->rasterizer->state.half_pixel_center);
661
662 gen7_3DSTATE_SAMPLE_MASK(r->builder,
663 (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
664 vec->fb.num_samples);
665 }
666 }
667
668 void
669 ilo_render_emit_draw_commands_gen7(struct ilo_render *render,
670 const struct ilo_state_vector *vec,
671 struct ilo_render_draw_session *session)
672 {
673 ILO_DEV_ASSERT(render->dev, 7, 7.5);
674
675 /*
676 * We try to keep the order of the commands match, as closely as possible,
677 * that of the classic i965 driver. It allows us to compare the command
678 * streams easily.
679 */
680 gen6_draw_common_select(render, vec, session);
681 gen6_draw_common_sip(render, vec, session);
682 gen6_draw_vf_statistics(render, vec, session);
683 gen7_draw_common_pcb_alloc(render, vec, session);
684 gen6_draw_common_base_address(render, vec, session);
685 gen7_draw_common_pointers_1(render, vec, session);
686 gen7_draw_common_urb(render, vec, session);
687 gen7_draw_common_pointers_2(render, vec, session);
688 gen7_draw_wm_multisample(render, vec, session);
689 gen7_draw_gs(render, vec, session);
690 gen7_draw_hs(render, vec, session);
691 gen7_draw_te(render, vec, session);
692 gen7_draw_ds(render, vec, session);
693 gen7_draw_vs(render, vec, session);
694 gen7_draw_sol(render, vec, session);
695 gen6_draw_clip(render, vec, session);
696 gen7_draw_sf(render, vec, session);
697 gen7_draw_wm(render, vec, session);
698 gen6_draw_wm_raster(render, vec, session);
699 gen6_draw_sf_rect(render, vec, session);
700 gen6_draw_vf(render, vec, session);
701
702 gen7_3dprimitive(render, vec->draw, &vec->ib);
703 }
704
705 static void
706 gen7_rectlist_pcb_alloc(struct ilo_render *r,
707 const struct ilo_blitter *blitter)
708 {
709 /*
710 * Push constant buffers are only allowed to take up at most the first
711 * 16KB of the URB. Split the space evenly for VS and FS.
712 */
713 const int max_size =
714 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
715 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
716 32768 : 16384;
717 const int size = max_size / 2;
718 int offset = 0;
719
720 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
721 offset += size;
722
723 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
724
725 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
726 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
727 }
728
729 static void
730 gen7_rectlist_urb(struct ilo_render *r,
731 const struct ilo_blitter *blitter)
732 {
733 /* the first 16KB are reserved for VS and PS PCBs */
734 const int offset =
735 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
736 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
737 32768 : 16384;
738
739 gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
740 (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
741 4 * sizeof(float));
742
743 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
744 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
745 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
746 }
747
748 static void
749 gen7_rectlist_vs_to_sf(struct ilo_render *r,
750 const struct ilo_blitter *blitter)
751 {
752 gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
753 gen6_disable_3DSTATE_VS(r->builder);
754
755 gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
756 gen7_disable_3DSTATE_HS(r->builder);
757
758 gen7_3DSTATE_TE(r->builder);
759
760 gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
761 gen7_disable_3DSTATE_DS(r->builder);
762
763 gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
764 gen7_disable_3DSTATE_GS(r->builder);
765
766 gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
767
768 gen6_disable_3DSTATE_CLIP(r->builder);
769
770 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
771 gen7_wa_pre_3dstate_sf_depth_bias(r);
772
773 gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format,
774 blitter->fb.num_samples);
775 gen7_3DSTATE_SBE(r->builder, NULL, 0);
776 }
777
778 static void
779 gen7_rectlist_wm(struct ilo_render *r,
780 const struct ilo_blitter *blitter)
781 {
782 uint32_t hiz_op;
783
784 switch (blitter->op) {
785 case ILO_BLITTER_RECTLIST_CLEAR_ZS:
786 hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
787 break;
788 case ILO_BLITTER_RECTLIST_RESOLVE_Z:
789 hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
790 break;
791 case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
792 hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
793 break;
794 default:
795 hiz_op = 0;
796 break;
797 }
798
799 gen7_hiz_3DSTATE_WM(r->builder, hiz_op);
800
801 gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
802
803 gen7_wa_pre_3dstate_ps_max_threads(r);
804 gen7_disable_3DSTATE_PS(r->builder);
805 }
806
807 static void
808 gen7_rectlist_wm_depth(struct ilo_render *r,
809 const struct ilo_blitter *blitter)
810 {
811 gen7_wa_pre_depth(r);
812
813 if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
814 ILO_BLITTER_USE_FB_STENCIL)) {
815 gen6_3DSTATE_DEPTH_BUFFER(r->builder,
816 &blitter->fb.dst.u.zs, true);
817 }
818
819 if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
820 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
821 &blitter->fb.dst.u.zs);
822 }
823
824 if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
825 gen6_3DSTATE_STENCIL_BUFFER(r->builder,
826 &blitter->fb.dst.u.zs);
827 }
828
829 gen7_3DSTATE_CLEAR_PARAMS(r->builder,
830 blitter->depth_clear_value);
831 }
832
833 static void
834 gen7_rectlist_wm_multisample(struct ilo_render *r,
835 const struct ilo_blitter *blitter)
836 {
837 const uint32_t *pattern =
838 (blitter->fb.num_samples > 4) ? r->sample_pattern_8x :
839 (blitter->fb.num_samples > 1) ? &r->sample_pattern_4x :
840 &r->sample_pattern_1x;
841
842 gen7_wa_pre_3dstate_multisample(r);
843
844 gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
845 pattern, true);
846
847 gen7_3DSTATE_SAMPLE_MASK(r->builder,
848 (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
849 }
850
851 void
852 ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
853 const struct ilo_blitter *blitter,
854 const struct ilo_render_rectlist_session *session)
855 {
856 ILO_DEV_ASSERT(r->dev, 7, 7.5);
857
858 gen7_rectlist_wm_multisample(r, blitter);
859
860 gen6_state_base_address(r->builder, true);
861
862 gen6_user_3DSTATE_VERTEX_BUFFERS(r->builder,
863 session->vb_start, session->vb_end,
864 sizeof(blitter->vertices[0]));
865
866 gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
867
868 gen7_rectlist_pcb_alloc(r, blitter);
869
870 /* needed for any VS-related commands */
871 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
872 gen7_wa_pre_vs(r);
873
874 gen7_rectlist_urb(r, blitter);
875
876 if (blitter->uses & ILO_BLITTER_USE_DSA) {
877 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
878 r->state.DEPTH_STENCIL_STATE);
879 }
880
881 if (blitter->uses & ILO_BLITTER_USE_CC) {
882 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
883 r->state.COLOR_CALC_STATE);
884 }
885
886 gen7_rectlist_vs_to_sf(r, blitter);
887 gen7_rectlist_wm(r, blitter);
888
889 if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
890 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
891 r->state.CC_VIEWPORT);
892 }
893
894 gen7_rectlist_wm_depth(r, blitter);
895
896 gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
897 blitter->fb.width, blitter->fb.height);
898
899 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
900 gen7_wa_post_ps_and_later(r);
901
902 gen7_3dprimitive(r, &blitter->draw, NULL);
903 }
904
905 int
906 ilo_render_get_draw_commands_len_gen7(const struct ilo_render *render,
907 const struct ilo_state_vector *vec)
908 {
909 static int len;
910
911 ILO_DEV_ASSERT(render->dev, 7, 7.5);
912
913 if (!len) {
914 len += GEN7_3DSTATE_URB_ANY__SIZE * 4;
915 len += GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE * 5;
916 len += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 5;
917 len += GEN7_3DSTATE_POINTERS_ANY__SIZE * (5 + 5 + 4);
918 len += GEN7_3DSTATE_SO_BUFFER__SIZE * 4;
919 len += GEN6_PIPE_CONTROL__SIZE * 5;
920
921 len +=
922 GEN6_STATE_BASE_ADDRESS__SIZE +
923 GEN6_STATE_SIP__SIZE +
924 GEN6_3DSTATE_VF_STATISTICS__SIZE +
925 GEN6_PIPELINE_SELECT__SIZE +
926 GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
927 GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
928 GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
929 GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
930 GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
931 GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
932 GEN6_3DSTATE_INDEX_BUFFER__SIZE +
933 GEN75_3DSTATE_VF__SIZE +
934 GEN6_3DSTATE_VS__SIZE +
935 GEN6_3DSTATE_GS__SIZE +
936 GEN6_3DSTATE_CLIP__SIZE +
937 GEN6_3DSTATE_SF__SIZE +
938 GEN6_3DSTATE_WM__SIZE +
939 GEN6_3DSTATE_SAMPLE_MASK__SIZE +
940 GEN7_3DSTATE_HS__SIZE +
941 GEN7_3DSTATE_TE__SIZE +
942 GEN7_3DSTATE_DS__SIZE +
943 GEN7_3DSTATE_STREAMOUT__SIZE +
944 GEN7_3DSTATE_SBE__SIZE +
945 GEN7_3DSTATE_PS__SIZE +
946 GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
947 GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
948 GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
949 GEN6_3DSTATE_LINE_STIPPLE__SIZE +
950 GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
951 GEN6_3DSTATE_MULTISAMPLE__SIZE +
952 GEN7_3DSTATE_SO_DECL_LIST__SIZE +
953 GEN6_3DPRIMITIVE__SIZE;
954 }
955
956 return len;
957 }