ilo: clean up Gen7.5 WAs
[mesa.git] / src / gallium / drivers / ilo / ilo_render_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30
31 #include "ilo_blitter.h"
32 #include "ilo_builder_3d.h"
33 #include "ilo_builder_render.h"
34 #include "ilo_shader.h"
35 #include "ilo_state.h"
36 #include "ilo_render_gen.h"
37
38 /**
39 * A wrapper for gen6_PIPE_CONTROL().
40 */
41 static void
42 gen7_pipe_control(struct ilo_render *r, uint32_t dw1)
43 {
44 struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
45 r->workaround_bo : NULL;
46
47 ILO_DEV_ASSERT(r->dev, 7, 7.5);
48
49 if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
50 /* CS stall cannot be set alone */
51 const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
52 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
53 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
54 GEN6_PIPE_CONTROL_DEPTH_STALL |
55 GEN6_PIPE_CONTROL_WRITE__MASK;
56 if (!(dw1 & mask))
57 dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
58 }
59
60 gen6_PIPE_CONTROL(r->builder, dw1, bo, 0, 0);
61
62 r->state.current_pipe_control_dw1 |= dw1;
63 r->state.deferred_pipe_control_dw1 &= ~dw1;
64 }
65
66 static void
67 gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_render *r)
68 {
69 /*
70 * From the Ivy Bridge PRM, volume 2 part 1, page 292:
71 *
72 * "A PIPE_CONTOL command with the CS Stall bit set must be programmed
73 * in the ring after this instruction
74 * (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
75 */
76 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
77
78 ILO_DEV_ASSERT(r->dev, 7, 7);
79
80 r->state.deferred_pipe_control_dw1 |= dw1;
81 }
82
83 static void
84 gen7_wa_pre_vs(struct ilo_render *r)
85 {
86 /*
87 * From the Ivy Bridge PRM, volume 2 part 1, page 106:
88 *
89 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
90 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
91 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
92 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
93 * needs to be sent before any combination of VS associated 3DSTATE."
94 */
95 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
96 GEN6_PIPE_CONTROL_WRITE_IMM;
97
98 ILO_DEV_ASSERT(r->dev, 7, 7);
99
100 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
101 gen7_pipe_control(r, dw1);
102 }
103
104 static void
105 gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_render *r)
106 {
107 /*
108 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
109 *
110 * "Due to an HW issue driver needs to send a pipe control with stall
111 * when ever there is state change in depth bias related state (in
112 * 3DSTATE_SF)"
113 */
114 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
115
116 ILO_DEV_ASSERT(r->dev, 7, 7);
117
118 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
119 gen7_pipe_control(r, dw1);
120 }
121
122 static void
123 gen7_wa_pre_3dstate_multisample(struct ilo_render *r)
124 {
125 /*
126 * From the Ivy Bridge PRM, volume 2 part 1, page 304:
127 *
128 * "Driver must ierarchi that all the caches in the depth pipe are
129 * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
130 * requires driver to send a PIPE_CONTROL with a CS stall along with a
131 * Depth Flush prior to this command.
132 */
133 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
134 GEN6_PIPE_CONTROL_CS_STALL;
135
136 ILO_DEV_ASSERT(r->dev, 7, 7.5);
137
138 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
139 gen7_pipe_control(r, dw1);
140 }
141
142 static void
143 gen7_wa_pre_depth(struct ilo_render *r)
144 {
145 ILO_DEV_ASSERT(r->dev, 7, 7.5);
146
147 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
148 /*
149 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
150 *
151 * "Driver must send a least one PIPE_CONTROL command with CS Stall
152 * and a post sync operation prior to the group of depth
153 * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
154 * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
155 */
156 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
157 GEN6_PIPE_CONTROL_WRITE_IMM;
158
159 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
160 gen7_pipe_control(r, dw1);
161 }
162
163 /*
164 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
165 *
166 * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
167 * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
168 * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
169 * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
170 * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
171 * Depth Flush Bit set, followed by another pipelined depth stall
172 * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
173 * guarantee that the pipeline from WM onwards is already flushed
174 * (e.g., via a preceding MI_FLUSH)."
175 */
176 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
177 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
178 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
179 }
180
181 static void
182 gen7_wa_pre_3dstate_ps_max_threads(struct ilo_render *r)
183 {
184 /*
185 * From the Ivy Bridge PRM, volume 2 part 1, page 286:
186 *
187 * "If this field (Maximum Number of Threads in 3DSTATE_PS) is changed
188 * between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
189 * Pixel Scoreboard set is required to be issued."
190 */
191 const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
192
193 ILO_DEV_ASSERT(r->dev, 7, 7.5);
194
195 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
196 gen7_pipe_control(r, dw1);
197 }
198
199 static void
200 gen7_wa_post_ps_and_later(struct ilo_render *r)
201 {
202 /*
203 * From the Ivy Bridge PRM, volume 2 part 1, page 276:
204 *
205 * "The driver must make sure a PIPE_CONTROL with the Depth Stall
206 * Enable bit set after all the following states are programmed:
207 *
208 * - 3DSTATE_PS
209 * - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
210 * - 3DSTATE_CONSTANT_PS
211 * - 3DSTATE_BINDING_TABLE_POINTERS_PS
212 * - 3DSTATE_SAMPLER_STATE_POINTERS_PS
213 * - 3DSTATE_CC_STATE_POINTERS
214 * - 3DSTATE_BLEND_STATE_POINTERS
215 * - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
216 */
217 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
218
219 ILO_DEV_ASSERT(r->dev, 7, 7);
220
221 r->state.deferred_pipe_control_dw1 |= dw1;
222 }
223
224 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
225
226 void
227 gen7_draw_common_urb(struct ilo_render *r,
228 const struct ilo_state_vector *vec,
229 struct ilo_render_draw_session *session)
230 {
231 /* 3DSTATE_URB_{VS,GS,HS,DS} */
232 if (DIRTY(VE) || DIRTY(VS)) {
233 /* the first 16KB are reserved for VS and PS PCBs */
234 const int offset =
235 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
236 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
237 32768 : 16384;
238 int vs_entry_size, vs_total_size;
239
240 vs_entry_size = (vec->vs) ?
241 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
242
243 /*
244 * From the Ivy Bridge PRM, volume 2 part 1, page 35:
245 *
246 * "Programming Restriction: As the VS URB entry serves as both the
247 * per-vertex input and output of the VS shader, the VS URB
248 * Allocation Size must be sized to the maximum of the vertex input
249 * and output structures."
250 */
251 if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
252 vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
253
254 vs_entry_size *= sizeof(float) * 4;
255 vs_total_size = r->dev->urb_size - offset;
256
257 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
258 gen7_wa_pre_vs(r);
259
260 gen7_3DSTATE_URB_VS(r->builder,
261 offset, vs_total_size, vs_entry_size);
262
263 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
264 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
265 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
266 }
267 }
268
269 void
270 gen7_draw_common_pcb_alloc(struct ilo_render *r,
271 const struct ilo_state_vector *vec,
272 struct ilo_render_draw_session *session)
273 {
274 /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
275 if (r->hw_ctx_changed) {
276 /*
277 * Push constant buffers are only allowed to take up at most the first
278 * 16KB of the URB. Split the space evenly for VS and FS.
279 */
280 const int max_size =
281 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
282 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
283 32768 : 16384;
284 const int size = max_size / 2;
285 int offset = 0;
286
287 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
288 offset += size;
289
290 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
291
292 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
293 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
294 }
295 }
296
297 void
298 gen7_draw_common_pointers_1(struct ilo_render *r,
299 const struct ilo_state_vector *vec,
300 struct ilo_render_draw_session *session)
301 {
302 /* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */
303 if (session->viewport_changed) {
304 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
305 r->state.CC_VIEWPORT);
306
307 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(r->builder,
308 r->state.SF_CLIP_VIEWPORT);
309 }
310 }
311
312 void
313 gen7_draw_common_pointers_2(struct ilo_render *r,
314 const struct ilo_state_vector *vec,
315 struct ilo_render_draw_session *session)
316 {
317 /* 3DSTATE_BLEND_STATE_POINTERS */
318 if (session->blend_changed) {
319 gen7_3DSTATE_BLEND_STATE_POINTERS(r->builder,
320 r->state.BLEND_STATE);
321 }
322
323 /* 3DSTATE_CC_STATE_POINTERS */
324 if (session->cc_changed) {
325 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
326 r->state.COLOR_CALC_STATE);
327 }
328
329 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */
330 if (ilo_dev_gen(r->dev) < ILO_GEN(8) && session->dsa_changed) {
331 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
332 r->state.DEPTH_STENCIL_STATE);
333 }
334 }
335
336 void
337 gen7_draw_vs(struct ilo_render *r,
338 const struct ilo_state_vector *vec,
339 struct ilo_render_draw_session *session)
340 {
341 const bool emit_3dstate_binding_table = session->binding_table_vs_changed;
342 const bool emit_3dstate_sampler_state = session->sampler_vs_changed;
343 /* see gen6_draw_vs() */
344 const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
345 const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
346
347 /* emit depth stall before any of the VS commands */
348 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
349 if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
350 emit_3dstate_constant_vs || emit_3dstate_vs)
351 gen7_wa_pre_vs(r);
352 }
353
354 /* 3DSTATE_BINDING_TABLE_POINTERS_VS */
355 if (emit_3dstate_binding_table) {
356 gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(r->builder,
357 r->state.vs.BINDING_TABLE_STATE);
358 }
359
360 /* 3DSTATE_SAMPLER_STATE_POINTERS_VS */
361 if (emit_3dstate_sampler_state) {
362 gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(r->builder,
363 r->state.vs.SAMPLER_STATE);
364 }
365
366 /* 3DSTATE_CONSTANT_VS */
367 if (emit_3dstate_constant_vs) {
368 gen7_3DSTATE_CONSTANT_VS(r->builder,
369 &r->state.vs.PUSH_CONSTANT_BUFFER,
370 &r->state.vs.PUSH_CONSTANT_BUFFER_size,
371 1);
372 }
373
374 /* 3DSTATE_VS */
375 if (ilo_dev_gen(r->dev) >= ILO_GEN(8)) {
376 if (emit_3dstate_vs || DIRTY(RASTERIZER)) {
377 gen8_3DSTATE_VS(r->builder, vec->vs,
378 vec->rasterizer->state.clip_plane_enable);
379 }
380 } else {
381 if (emit_3dstate_vs)
382 gen6_3DSTATE_VS(r->builder, vec->vs);
383 }
384 }
385
386 void
387 gen7_draw_hs(struct ilo_render *r,
388 const struct ilo_state_vector *vec,
389 struct ilo_render_draw_session *session)
390 {
391 /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
392 if (r->hw_ctx_changed) {
393 gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
394 gen7_disable_3DSTATE_HS(r->builder);
395 }
396
397 /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
398 if (r->hw_ctx_changed)
399 gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(r->builder, 0);
400 }
401
402 void
403 gen7_draw_te(struct ilo_render *r,
404 const struct ilo_state_vector *vec,
405 struct ilo_render_draw_session *session)
406 {
407 /* 3DSTATE_TE */
408 if (r->hw_ctx_changed)
409 gen7_3DSTATE_TE(r->builder);
410 }
411
412 void
413 gen7_draw_ds(struct ilo_render *r,
414 const struct ilo_state_vector *vec,
415 struct ilo_render_draw_session *session)
416 {
417 /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
418 if (r->hw_ctx_changed) {
419 gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
420 gen7_disable_3DSTATE_DS(r->builder);
421 }
422
423 /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
424 if (r->hw_ctx_changed)
425 gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(r->builder, 0);
426
427 }
428
429 void
430 gen7_draw_gs(struct ilo_render *r,
431 const struct ilo_state_vector *vec,
432 struct ilo_render_draw_session *session)
433 {
434 /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
435 if (r->hw_ctx_changed) {
436 gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
437 gen7_disable_3DSTATE_GS(r->builder);
438 }
439
440 /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
441 if (session->binding_table_gs_changed) {
442 gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(r->builder,
443 r->state.gs.BINDING_TABLE_STATE);
444 }
445 }
446
447 void
448 gen7_draw_sol(struct ilo_render *r,
449 const struct ilo_state_vector *vec,
450 struct ilo_render_draw_session *session)
451 {
452 const struct pipe_stream_output_info *so_info;
453 const struct ilo_shader_state *shader;
454 bool dirty_sh = false;
455
456 if (vec->gs) {
457 shader = vec->gs;
458 dirty_sh = DIRTY(GS);
459 }
460 else {
461 shader = vec->vs;
462 dirty_sh = DIRTY(VS);
463 }
464
465 so_info = ilo_shader_get_kernel_so_info(shader);
466
467 /* 3DSTATE_SO_BUFFER */
468 if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
469 vec->so.enabled) {
470 int i;
471
472 for (i = 0; i < vec->so.count; i++) {
473 const int stride = so_info->stride[i] * 4; /* in bytes */
474
475 gen7_3DSTATE_SO_BUFFER(r->builder, i, stride, vec->so.states[i]);
476 }
477
478 for (; i < 4; i++)
479 gen7_disable_3DSTATE_SO_BUFFER(r->builder, i);
480 }
481
482 /* 3DSTATE_SO_DECL_LIST */
483 if (dirty_sh && vec->so.enabled)
484 gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
485
486 /* 3DSTATE_STREAMOUT */
487 if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
488 const int output_count = ilo_shader_get_kernel_param(shader,
489 ILO_KERNEL_OUTPUT_COUNT);
490 int buf_strides[4] = { 0, 0, 0, 0 };
491 int i;
492
493 for (i = 0; i < vec->so.count; i++)
494 buf_strides[i] = so_info->stride[i] * 4;
495
496 gen7_3DSTATE_STREAMOUT(r->builder, 0,
497 vec->rasterizer->state.rasterizer_discard,
498 output_count, buf_strides);
499 }
500 }
501
502 static void
503 gen7_draw_sf(struct ilo_render *r,
504 const struct ilo_state_vector *vec,
505 struct ilo_render_draw_session *session)
506 {
507 /* 3DSTATE_SBE */
508 if (DIRTY(RASTERIZER) || DIRTY(FS)) {
509 gen7_3DSTATE_SBE(r->builder, vec->fs, (vec->rasterizer) ?
510 vec->rasterizer->state.sprite_coord_mode : 0);
511 }
512
513 /* 3DSTATE_SF */
514 if (DIRTY(RASTERIZER) || DIRTY(FB)) {
515 struct pipe_surface *zs = vec->fb.state.zsbuf;
516
517 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
518 gen7_wa_pre_3dstate_sf_depth_bias(r);
519
520 gen7_3DSTATE_SF(r->builder,
521 (vec->rasterizer) ? &vec->rasterizer->sf : NULL,
522 (zs) ? zs->format : PIPE_FORMAT_NONE,
523 vec->fb.num_samples);
524 }
525 }
526
527 static void
528 gen7_draw_wm(struct ilo_render *r,
529 const struct ilo_state_vector *vec,
530 struct ilo_render_draw_session *session)
531 {
532 /* 3DSTATE_WM */
533 if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
534 const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
535 vec->blend->alpha_to_coverage);
536
537 gen7_3DSTATE_WM(r->builder, vec->fs, vec->rasterizer, cc_may_kill);
538 }
539
540 /* 3DSTATE_BINDING_TABLE_POINTERS_PS */
541 if (session->binding_table_fs_changed) {
542 gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(r->builder,
543 r->state.wm.BINDING_TABLE_STATE);
544 }
545
546 /* 3DSTATE_SAMPLER_STATE_POINTERS_PS */
547 if (session->sampler_fs_changed) {
548 gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(r->builder,
549 r->state.wm.SAMPLER_STATE);
550 }
551
552 /* 3DSTATE_CONSTANT_PS */
553 if (session->pcb_fs_changed) {
554 gen7_3DSTATE_CONSTANT_PS(r->builder,
555 &r->state.wm.PUSH_CONSTANT_BUFFER,
556 &r->state.wm.PUSH_CONSTANT_BUFFER_size,
557 1);
558 }
559
560 /* 3DSTATE_PS */
561 if (DIRTY(FS) || DIRTY(BLEND) || r->instruction_bo_changed) {
562 const bool dual_blend = vec->blend->dual_blend;
563
564 if (r->hw_ctx_changed)
565 gen7_wa_pre_3dstate_ps_max_threads(r);
566
567 gen7_3DSTATE_PS(r->builder, vec->fs, dual_blend);
568 }
569
570 /* 3DSTATE_SCISSOR_STATE_POINTERS */
571 if (session->scissor_changed) {
572 gen6_3DSTATE_SCISSOR_STATE_POINTERS(r->builder,
573 r->state.SCISSOR_RECT);
574 }
575
576 {
577 const bool emit_3dstate_ps = (DIRTY(FS) || DIRTY(BLEND));
578 const bool emit_3dstate_depth_buffer =
579 (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed);
580
581 if (ilo_dev_gen(r->dev) == ILO_GEN(7)) {
582 /* XXX what is the best way to know if this workaround is needed? */
583 if (emit_3dstate_ps ||
584 session->pcb_fs_changed ||
585 session->viewport_changed ||
586 session->binding_table_fs_changed ||
587 session->sampler_fs_changed ||
588 session->cc_changed ||
589 session->blend_changed ||
590 session->dsa_changed)
591 gen7_wa_post_ps_and_later(r);
592 }
593
594 if (emit_3dstate_depth_buffer)
595 gen7_wa_pre_depth(r);
596 }
597
598 /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
599 if (DIRTY(FB) || r->batch_bo_changed) {
600 const struct ilo_zs_surface *zs;
601 uint32_t clear_params;
602
603 if (vec->fb.state.zsbuf) {
604 const struct ilo_surface_cso *surface =
605 (const struct ilo_surface_cso *) vec->fb.state.zsbuf;
606 const struct ilo_texture_slice *slice =
607 ilo_texture_get_slice(ilo_texture(surface->base.texture),
608 surface->base.u.tex.level, surface->base.u.tex.first_layer);
609
610 assert(!surface->is_rt);
611 zs = &surface->u.zs;
612 clear_params = slice->clear_value;
613 }
614 else {
615 zs = &vec->fb.null_zs;
616 clear_params = 0;
617 }
618
619 gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
620 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
621 gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
622 gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
623 }
624 }
625
626 static void
627 gen7_draw_wm_multisample(struct ilo_render *r,
628 const struct ilo_state_vector *vec,
629 struct ilo_render_draw_session *session)
630 {
631 /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
632 if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
633 const uint32_t *pattern;
634
635 gen7_wa_pre_3dstate_multisample(r);
636
637 pattern = (vec->fb.num_samples > 4) ? r->sample_pattern_8x :
638 (vec->fb.num_samples > 1) ? &r->sample_pattern_4x :
639 &r->sample_pattern_1x;
640
641 gen6_3DSTATE_MULTISAMPLE(r->builder,
642 vec->fb.num_samples, pattern,
643 vec->rasterizer->state.half_pixel_center);
644
645 gen7_3DSTATE_SAMPLE_MASK(r->builder,
646 (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
647 vec->fb.num_samples);
648 }
649 }
650
651 void
652 gen7_draw_vf_draw(struct ilo_render *r,
653 const struct ilo_state_vector *vec,
654 struct ilo_render_draw_session *session)
655 {
656 if (r->state.deferred_pipe_control_dw1)
657 gen7_pipe_control(r, r->state.deferred_pipe_control_dw1);
658
659 /* 3DPRIMITIVE */
660 gen7_3DPRIMITIVE(r->builder, vec->draw, &vec->ib);
661
662 r->state.current_pipe_control_dw1 = 0;
663 r->state.deferred_pipe_control_dw1 = 0;
664 }
665
666 void
667 ilo_render_emit_draw_commands_gen7(struct ilo_render *render,
668 const struct ilo_state_vector *vec,
669 struct ilo_render_draw_session *session)
670 {
671 ILO_DEV_ASSERT(render->dev, 7, 7.5);
672
673 /*
674 * We try to keep the order of the commands match, as closely as possible,
675 * that of the classic i965 driver. It allows us to compare the command
676 * streams easily.
677 */
678 gen6_draw_common_select(render, vec, session);
679 gen6_draw_common_sip(render, vec, session);
680 gen6_draw_vf_statistics(render, vec, session);
681 gen7_draw_common_pcb_alloc(render, vec, session);
682 gen6_draw_common_base_address(render, vec, session);
683 gen7_draw_common_pointers_1(render, vec, session);
684 gen7_draw_common_urb(render, vec, session);
685 gen7_draw_common_pointers_2(render, vec, session);
686 gen7_draw_wm_multisample(render, vec, session);
687 gen7_draw_gs(render, vec, session);
688 gen7_draw_hs(render, vec, session);
689 gen7_draw_te(render, vec, session);
690 gen7_draw_ds(render, vec, session);
691 gen7_draw_vs(render, vec, session);
692 gen7_draw_sol(render, vec, session);
693 gen6_draw_clip(render, vec, session);
694 gen7_draw_sf(render, vec, session);
695 gen7_draw_wm(render, vec, session);
696 gen6_draw_wm_raster(render, vec, session);
697 gen6_draw_sf_rect(render, vec, session);
698 gen6_draw_vf(render, vec, session);
699 gen7_draw_vf_draw(render, vec, session);
700 }
701
702 static void
703 gen7_rectlist_pcb_alloc(struct ilo_render *r,
704 const struct ilo_blitter *blitter)
705 {
706 /*
707 * Push constant buffers are only allowed to take up at most the first
708 * 16KB of the URB. Split the space evenly for VS and FS.
709 */
710 const int max_size =
711 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
712 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
713 32768 : 16384;
714 const int size = max_size / 2;
715 int offset = 0;
716
717 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
718 offset += size;
719
720 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
721
722 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
723 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
724 }
725
726 static void
727 gen7_rectlist_urb(struct ilo_render *r,
728 const struct ilo_blitter *blitter)
729 {
730 /* the first 16KB are reserved for VS and PS PCBs */
731 const int offset =
732 (ilo_dev_gen(r->dev) >= ILO_GEN(8)) ||
733 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ?
734 32768 : 16384;
735
736 gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
737 (blitter->ve.count + blitter->ve.prepend_nosrc_cso) *
738 4 * sizeof(float));
739
740 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
741 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
742 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
743 }
744
745 static void
746 gen7_rectlist_vs_to_sf(struct ilo_render *r,
747 const struct ilo_blitter *blitter)
748 {
749 gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
750 gen6_disable_3DSTATE_VS(r->builder);
751
752 gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
753 gen7_disable_3DSTATE_HS(r->builder);
754
755 gen7_3DSTATE_TE(r->builder);
756
757 gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
758 gen7_disable_3DSTATE_DS(r->builder);
759
760 gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
761 gen7_disable_3DSTATE_GS(r->builder);
762
763 gen7_3DSTATE_STREAMOUT(r->builder, 0, false, 0x0, 0);
764
765 gen6_disable_3DSTATE_CLIP(r->builder);
766
767 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
768 gen7_wa_pre_3dstate_sf_depth_bias(r);
769
770 gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format,
771 blitter->fb.num_samples);
772 gen7_3DSTATE_SBE(r->builder, NULL, 0);
773 }
774
775 static void
776 gen7_rectlist_wm(struct ilo_render *r,
777 const struct ilo_blitter *blitter)
778 {
779 uint32_t hiz_op;
780
781 switch (blitter->op) {
782 case ILO_BLITTER_RECTLIST_CLEAR_ZS:
783 hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
784 break;
785 case ILO_BLITTER_RECTLIST_RESOLVE_Z:
786 hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
787 break;
788 case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
789 hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
790 break;
791 default:
792 hiz_op = 0;
793 break;
794 }
795
796 gen7_hiz_3DSTATE_WM(r->builder, hiz_op);
797
798 gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
799
800 gen7_wa_pre_3dstate_ps_max_threads(r);
801 gen7_disable_3DSTATE_PS(r->builder);
802 }
803
804 static void
805 gen7_rectlist_wm_depth(struct ilo_render *r,
806 const struct ilo_blitter *blitter)
807 {
808 gen7_wa_pre_depth(r);
809
810 if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
811 ILO_BLITTER_USE_FB_STENCIL)) {
812 gen6_3DSTATE_DEPTH_BUFFER(r->builder,
813 &blitter->fb.dst.u.zs, true);
814 }
815
816 if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
817 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
818 &blitter->fb.dst.u.zs);
819 }
820
821 if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
822 gen6_3DSTATE_STENCIL_BUFFER(r->builder,
823 &blitter->fb.dst.u.zs);
824 }
825
826 gen7_3DSTATE_CLEAR_PARAMS(r->builder,
827 blitter->depth_clear_value);
828 }
829
830 static void
831 gen7_rectlist_wm_multisample(struct ilo_render *r,
832 const struct ilo_blitter *blitter)
833 {
834 const uint32_t *pattern =
835 (blitter->fb.num_samples > 4) ? r->sample_pattern_8x :
836 (blitter->fb.num_samples > 1) ? &r->sample_pattern_4x :
837 &r->sample_pattern_1x;
838
839 gen7_wa_pre_3dstate_multisample(r);
840
841 gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
842 pattern, true);
843
844 gen7_3DSTATE_SAMPLE_MASK(r->builder,
845 (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
846 }
847
848 void
849 ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
850 const struct ilo_blitter *blitter,
851 const struct ilo_render_rectlist_session *session)
852 {
853 ILO_DEV_ASSERT(r->dev, 7, 7.5);
854
855 gen7_rectlist_wm_multisample(r, blitter);
856
857 gen6_state_base_address(r->builder, true);
858
859 gen6_user_3DSTATE_VERTEX_BUFFERS(r->builder,
860 session->vb_start, session->vb_end,
861 sizeof(blitter->vertices[0]));
862
863 gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
864
865 gen7_rectlist_pcb_alloc(r, blitter);
866
867 /* needed for any VS-related commands */
868 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
869 gen7_wa_pre_vs(r);
870
871 gen7_rectlist_urb(r, blitter);
872
873 if (blitter->uses & ILO_BLITTER_USE_DSA) {
874 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
875 r->state.DEPTH_STENCIL_STATE);
876 }
877
878 if (blitter->uses & ILO_BLITTER_USE_CC) {
879 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
880 r->state.COLOR_CALC_STATE);
881 }
882
883 gen7_rectlist_vs_to_sf(r, blitter);
884 gen7_rectlist_wm(r, blitter);
885
886 if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
887 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
888 r->state.CC_VIEWPORT);
889 }
890
891 gen7_rectlist_wm_depth(r, blitter);
892
893 gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
894 blitter->fb.width, blitter->fb.height);
895
896 gen7_3DPRIMITIVE(r->builder, &blitter->draw, NULL);
897 }
898
899 int
900 ilo_render_get_draw_commands_len_gen7(const struct ilo_render *render,
901 const struct ilo_state_vector *vec)
902 {
903 static int len;
904
905 ILO_DEV_ASSERT(render->dev, 7, 7.5);
906
907 if (!len) {
908 len += GEN7_3DSTATE_URB_ANY__SIZE * 4;
909 len += GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE * 5;
910 len += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 5;
911 len += GEN7_3DSTATE_POINTERS_ANY__SIZE * (5 + 5 + 4);
912 len += GEN7_3DSTATE_SO_BUFFER__SIZE * 4;
913 len += GEN6_PIPE_CONTROL__SIZE * 5;
914
915 len +=
916 GEN6_STATE_BASE_ADDRESS__SIZE +
917 GEN6_STATE_SIP__SIZE +
918 GEN6_3DSTATE_VF_STATISTICS__SIZE +
919 GEN6_PIPELINE_SELECT__SIZE +
920 GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
921 GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
922 GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
923 GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
924 GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
925 GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
926 GEN6_3DSTATE_INDEX_BUFFER__SIZE +
927 GEN75_3DSTATE_VF__SIZE +
928 GEN6_3DSTATE_VS__SIZE +
929 GEN6_3DSTATE_GS__SIZE +
930 GEN6_3DSTATE_CLIP__SIZE +
931 GEN6_3DSTATE_SF__SIZE +
932 GEN6_3DSTATE_WM__SIZE +
933 GEN6_3DSTATE_SAMPLE_MASK__SIZE +
934 GEN7_3DSTATE_HS__SIZE +
935 GEN7_3DSTATE_TE__SIZE +
936 GEN7_3DSTATE_DS__SIZE +
937 GEN7_3DSTATE_STREAMOUT__SIZE +
938 GEN7_3DSTATE_SBE__SIZE +
939 GEN7_3DSTATE_PS__SIZE +
940 GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
941 GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
942 GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
943 GEN6_3DSTATE_LINE_STIPPLE__SIZE +
944 GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
945 GEN6_3DSTATE_MULTISAMPLE__SIZE +
946 GEN7_3DSTATE_SO_DECL_LIST__SIZE +
947 GEN6_3DPRIMITIVE__SIZE;
948 }
949
950 return len;
951 }