723288266be8b060d0ad157fe217c8060d393f54
[mesa.git] / src / gallium / drivers / ilo / ilo_render_gen6.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30 #include "util/u_prim.h"
31
32 #include "ilo_blitter.h"
33 #include "ilo_builder_3d.h"
34 #include "ilo_builder_mi.h"
35 #include "ilo_builder_render.h"
36 #include "ilo_query.h"
37 #include "ilo_shader.h"
38 #include "ilo_state.h"
39 #include "ilo_render_gen.h"
40
41 /**
42 * A wrapper for gen6_PIPE_CONTROL().
43 */
44 static void
45 gen6_pipe_control(struct ilo_render *r, uint32_t dw1)
46 {
47 struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
48 r->workaround_bo : NULL;
49
50 ILO_DEV_ASSERT(r->dev, 6, 6);
51
52 gen6_PIPE_CONTROL(r->builder, dw1, bo, 0, 0);
53
54 r->state.current_pipe_control_dw1 |= dw1;
55
56 assert(!r->state.deferred_pipe_control_dw1);
57 }
58
59 static void
60 gen6_3dprimitive(struct ilo_render *r,
61 const struct pipe_draw_info *info,
62 const struct ilo_ib_state *ib)
63 {
64 ILO_DEV_ASSERT(r->dev, 6, 6);
65
66 /* 3DPRIMITIVE */
67 gen6_3DPRIMITIVE(r->builder, info, ib);
68
69 r->state.current_pipe_control_dw1 = 0;
70 assert(!r->state.deferred_pipe_control_dw1);
71 }
72
73 /**
74 * This should be called before PIPE_CONTROL.
75 */
76 void
77 gen6_wa_pre_pipe_control(struct ilo_render *r, uint32_t dw1)
78 {
79 /*
80 * From the Sandy Bridge PRM, volume 2 part 1, page 60:
81 *
82 * "Pipe-control with CS-stall bit set must be sent BEFORE the
83 * pipe-control with a post-sync op and no write-cache flushes."
84 *
85 * This WA may also be triggered indirectly by the other two WAs on the
86 * same page:
87 *
88 * "Before any depth stall flush (including those produced by
89 * non-pipelined state commands), software needs to first send a
90 * PIPE_CONTROL with no bits set except Post-Sync Operation != 0."
91 *
92 * "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a
93 * PIPE_CONTROL with any non-zero post-sync-op is required."
94 */
95 const bool direct_wa_cond = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) &&
96 !(dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
97 const bool indirect_wa_cond = (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) |
98 (dw1 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH);
99
100 ILO_DEV_ASSERT(r->dev, 6, 6);
101
102 if (!direct_wa_cond && !indirect_wa_cond)
103 return;
104
105 if (!(r->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_CS_STALL)) {
106 /*
107 * From the Sandy Bridge PRM, volume 2 part 1, page 73:
108 *
109 * "1 of the following must also be set (when CS stall is set):
110 *
111 * - Depth Cache Flush Enable ([0] of DW1)
112 * - Stall at Pixel Scoreboard ([1] of DW1)
113 * - Depth Stall ([13] of DW1)
114 * - Post-Sync Operation ([13] of DW1)
115 * - Render Target Cache Flush Enable ([12] of DW1)
116 * - Notify Enable ([8] of DW1)"
117 *
118 * Because of the WAs above, we have to pick Stall at Pixel Scoreboard.
119 */
120 const uint32_t direct_wa = GEN6_PIPE_CONTROL_CS_STALL |
121 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
122
123 gen6_pipe_control(r, direct_wa);
124 }
125
126 if (indirect_wa_cond &&
127 !(r->state.current_pipe_control_dw1 & GEN6_PIPE_CONTROL_WRITE__MASK)) {
128 const uint32_t indirect_wa = GEN6_PIPE_CONTROL_WRITE_IMM;
129
130 gen6_pipe_control(r, indirect_wa);
131 }
132 }
133
134 /**
135 * This should be called before any non-pipelined state command.
136 */
137 static void
138 gen6_wa_pre_non_pipelined(struct ilo_render *r)
139 {
140 ILO_DEV_ASSERT(r->dev, 6, 6);
141
142 /* non-pipelined state commands produce depth stall */
143 gen6_wa_pre_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
144 }
145
146 static void
147 gen6_wa_post_3dstate_constant_vs(struct ilo_render *r)
148 {
149 /*
150 * According to upload_vs_state() of the classic driver, we need to emit a
151 * PIPE_CONTROL after 3DSTATE_CONSTANT_VS, otherwise the command is kept
152 * being buffered by VS FF, to the point that the FF dies.
153 */
154 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
155 GEN6_PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE |
156 GEN6_PIPE_CONTROL_STATE_CACHE_INVALIDATE;
157
158 gen6_wa_pre_pipe_control(r, dw1);
159
160 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
161 gen6_pipe_control(r, dw1);
162 }
163
164 static void
165 gen6_wa_pre_3dstate_wm_max_threads(struct ilo_render *r)
166 {
167 /*
168 * From the Sandy Bridge PRM, volume 2 part 1, page 274:
169 *
170 * "A PIPE_CONTROL command, with only the Stall At Pixel Scoreboard
171 * field set (DW1 Bit 1), must be issued prior to any change to the
172 * value in this field (Maximum Number of Threads in 3DSTATE_WM)"
173 */
174 const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
175
176 ILO_DEV_ASSERT(r->dev, 6, 6);
177
178 gen6_wa_pre_pipe_control(r, dw1);
179
180 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
181 gen6_pipe_control(r, dw1);
182 }
183
184 static void
185 gen6_wa_pre_3dstate_multisample(struct ilo_render *r)
186 {
187 /*
188 * From the Sandy Bridge PRM, volume 2 part 1, page 305:
189 *
190 * "Driver must guarentee that all the caches in the depth pipe are
191 * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
192 * requires driver to send a PIPE_CONTROL with a CS stall along with a
193 * Depth Flush prior to this command."
194 */
195 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
196 GEN6_PIPE_CONTROL_CS_STALL;
197
198 ILO_DEV_ASSERT(r->dev, 6, 6);
199
200 gen6_wa_pre_pipe_control(r, dw1);
201
202 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
203 gen6_pipe_control(r, dw1);
204 }
205
206 static void
207 gen6_wa_pre_depth(struct ilo_render *r)
208 {
209 ILO_DEV_ASSERT(r->dev, 6, 6);
210
211 /*
212 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
213 *
214 * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
215 * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
216 * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
217 * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
218 * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
219 * Depth Flush Bit set, followed by another pipelined depth stall
220 * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
221 * guarantee that the pipeline from WM onwards is already flushed
222 * (e.g., via a preceding MI_FLUSH)."
223 *
224 * According to the classic driver, it also applies for GEN6.
225 */
226 gen6_wa_pre_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL |
227 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
228
229 gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
230 gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
231 gen6_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
232 }
233
234 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
235
236 void
237 gen6_draw_common_select(struct ilo_render *r,
238 const struct ilo_state_vector *vec,
239 struct ilo_render_draw_session *session)
240 {
241 /* PIPELINE_SELECT */
242 if (r->hw_ctx_changed) {
243 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
244 gen6_wa_pre_non_pipelined(r);
245
246 gen6_PIPELINE_SELECT(r->builder, 0x0);
247 }
248 }
249
250 void
251 gen6_draw_common_sip(struct ilo_render *r,
252 const struct ilo_state_vector *vec,
253 struct ilo_render_draw_session *session)
254 {
255 /* STATE_SIP */
256 if (r->hw_ctx_changed) {
257 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
258 gen6_wa_pre_non_pipelined(r);
259
260 gen6_STATE_SIP(r->builder, 0);
261 }
262 }
263
264 void
265 gen6_draw_common_base_address(struct ilo_render *r,
266 const struct ilo_state_vector *vec,
267 struct ilo_render_draw_session *session)
268 {
269 /* STATE_BASE_ADDRESS */
270 if (r->state_bo_changed || r->instruction_bo_changed ||
271 r->batch_bo_changed) {
272 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
273 gen6_wa_pre_non_pipelined(r);
274
275 if (ilo_dev_gen(r->dev) >= ILO_GEN(8))
276 gen8_state_base_address(r->builder, r->hw_ctx_changed);
277 else
278 gen6_state_base_address(r->builder, r->hw_ctx_changed);
279
280 /*
281 * From the Sandy Bridge PRM, volume 1 part 1, page 28:
282 *
283 * "The following commands must be reissued following any change to
284 * the base addresses:
285 *
286 * * 3DSTATE_BINDING_TABLE_POINTERS
287 * * 3DSTATE_SAMPLER_STATE_POINTERS
288 * * 3DSTATE_VIEWPORT_STATE_POINTERS
289 * * 3DSTATE_CC_POINTERS
290 * * MEDIA_STATE_POINTERS"
291 *
292 * 3DSTATE_SCISSOR_STATE_POINTERS is not on the list, but it is
293 * reasonable to also reissue the command. Same to PCB.
294 */
295 session->viewport_changed = true;
296
297 session->scissor_changed = true;
298
299 session->blend_changed = true;
300 session->dsa_changed = true;
301 session->cc_changed = true;
302
303 session->sampler_vs_changed = true;
304 session->sampler_gs_changed = true;
305 session->sampler_fs_changed = true;
306
307 session->pcb_vs_changed = true;
308 session->pcb_gs_changed = true;
309 session->pcb_fs_changed = true;
310
311 session->binding_table_vs_changed = true;
312 session->binding_table_gs_changed = true;
313 session->binding_table_fs_changed = true;
314 }
315 }
316
317 static void
318 gen6_draw_common_urb(struct ilo_render *r,
319 const struct ilo_state_vector *vec,
320 struct ilo_render_draw_session *session)
321 {
322 /* 3DSTATE_URB */
323 if (DIRTY(VE) || DIRTY(VS) || DIRTY(GS)) {
324 const bool gs_active = (vec->gs || (vec->vs &&
325 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)));
326 int vs_entry_size, gs_entry_size;
327 int vs_total_size, gs_total_size;
328
329 vs_entry_size = (vec->vs) ?
330 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
331
332 /*
333 * As indicated by 2e712e41db0c0676e9f30fc73172c0e8de8d84d4, VF and VS
334 * share VUE handles. The VUE allocation size must be large enough to
335 * store either VF outputs (number of VERTEX_ELEMENTs) and VS outputs.
336 *
337 * I am not sure if the PRM explicitly states that VF and VS share VUE
338 * handles. But here is a citation that implies so:
339 *
340 * From the Sandy Bridge PRM, volume 2 part 1, page 44:
341 *
342 * "Once a FF stage that spawn threads has sufficient input to
343 * initiate a thread, it must guarantee that it is safe to request
344 * the thread initiation. For all these FF stages, this check is
345 * based on :
346 *
347 * - The availability of output URB entries:
348 * - VS: As the input URB entries are overwritten with the
349 * VS-generated output data, output URB availability isn't a
350 * factor."
351 */
352 if (vs_entry_size < vec->ve->count + vec->ve->prepend_nosrc_cso)
353 vs_entry_size = vec->ve->count + vec->ve->prepend_nosrc_cso;
354
355 gs_entry_size = (vec->gs) ?
356 ilo_shader_get_kernel_param(vec->gs, ILO_KERNEL_OUTPUT_COUNT) :
357 (gs_active) ? vs_entry_size : 0;
358
359 /* in bytes */
360 vs_entry_size *= sizeof(float) * 4;
361 gs_entry_size *= sizeof(float) * 4;
362 vs_total_size = r->dev->urb_size;
363
364 if (gs_active) {
365 vs_total_size /= 2;
366 gs_total_size = vs_total_size;
367 }
368 else {
369 gs_total_size = 0;
370 }
371
372 gen6_3DSTATE_URB(r->builder, vs_total_size, gs_total_size,
373 vs_entry_size, gs_entry_size);
374
375 /*
376 * From the Sandy Bridge PRM, volume 2 part 1, page 27:
377 *
378 * "Because of a urb corruption caused by allocating a previous
379 * gsunit's urb entry to vsunit software is required to send a
380 * "GS NULL Fence" (Send URB fence with VS URB size == 1 and GS URB
381 * size == 0) plus a dummy DRAW call before any case where VS will
382 * be taking over GS URB space."
383 */
384 if (r->state.gs.active && !gs_active)
385 ilo_render_emit_flush(r);
386
387 r->state.gs.active = gs_active;
388 }
389 }
390
391 static void
392 gen6_draw_common_pointers_1(struct ilo_render *r,
393 const struct ilo_state_vector *vec,
394 struct ilo_render_draw_session *session)
395 {
396 /* 3DSTATE_VIEWPORT_STATE_POINTERS */
397 if (session->viewport_changed) {
398 gen6_3DSTATE_VIEWPORT_STATE_POINTERS(r->builder,
399 r->state.CLIP_VIEWPORT,
400 r->state.SF_VIEWPORT,
401 r->state.CC_VIEWPORT);
402 }
403 }
404
405 static void
406 gen6_draw_common_pointers_2(struct ilo_render *r,
407 const struct ilo_state_vector *vec,
408 struct ilo_render_draw_session *session)
409 {
410 /* 3DSTATE_CC_STATE_POINTERS */
411 if (session->blend_changed ||
412 session->dsa_changed ||
413 session->cc_changed) {
414 gen6_3DSTATE_CC_STATE_POINTERS(r->builder,
415 r->state.BLEND_STATE,
416 r->state.DEPTH_STENCIL_STATE,
417 r->state.COLOR_CALC_STATE);
418 }
419
420 /* 3DSTATE_SAMPLER_STATE_POINTERS */
421 if (session->sampler_vs_changed ||
422 session->sampler_gs_changed ||
423 session->sampler_fs_changed) {
424 gen6_3DSTATE_SAMPLER_STATE_POINTERS(r->builder,
425 r->state.vs.SAMPLER_STATE,
426 0,
427 r->state.wm.SAMPLER_STATE);
428 }
429 }
430
431 static void
432 gen6_draw_common_pointers_3(struct ilo_render *r,
433 const struct ilo_state_vector *vec,
434 struct ilo_render_draw_session *session)
435 {
436 /* 3DSTATE_SCISSOR_STATE_POINTERS */
437 if (session->scissor_changed) {
438 gen6_3DSTATE_SCISSOR_STATE_POINTERS(r->builder,
439 r->state.SCISSOR_RECT);
440 }
441
442 /* 3DSTATE_BINDING_TABLE_POINTERS */
443 if (session->binding_table_vs_changed ||
444 session->binding_table_gs_changed ||
445 session->binding_table_fs_changed) {
446 gen6_3DSTATE_BINDING_TABLE_POINTERS(r->builder,
447 r->state.vs.BINDING_TABLE_STATE,
448 r->state.gs.BINDING_TABLE_STATE,
449 r->state.wm.BINDING_TABLE_STATE);
450 }
451 }
452
453 void
454 gen6_draw_vf(struct ilo_render *r,
455 const struct ilo_state_vector *vec,
456 struct ilo_render_draw_session *session)
457 {
458 if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
459 /* 3DSTATE_INDEX_BUFFER */
460 if (DIRTY(IB) || r->batch_bo_changed) {
461 gen6_3DSTATE_INDEX_BUFFER(r->builder,
462 &vec->ib, false);
463 }
464
465 /* 3DSTATE_VF */
466 if (session->primitive_restart_changed) {
467 gen75_3DSTATE_VF(r->builder, vec->draw->primitive_restart,
468 vec->draw->restart_index);
469 }
470 }
471 else {
472 /* 3DSTATE_INDEX_BUFFER */
473 if (DIRTY(IB) || session->primitive_restart_changed ||
474 r->batch_bo_changed) {
475 gen6_3DSTATE_INDEX_BUFFER(r->builder,
476 &vec->ib, vec->draw->primitive_restart);
477 }
478 }
479
480 /* 3DSTATE_VERTEX_BUFFERS */
481 if (DIRTY(VB) || DIRTY(VE) || r->batch_bo_changed)
482 gen6_3DSTATE_VERTEX_BUFFERS(r->builder, vec->ve, &vec->vb);
483
484 /* 3DSTATE_VERTEX_ELEMENTS */
485 if (DIRTY(VE))
486 gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, vec->ve);
487 }
488
489 void
490 gen6_draw_vf_statistics(struct ilo_render *r,
491 const struct ilo_state_vector *vec,
492 struct ilo_render_draw_session *session)
493 {
494 /* 3DSTATE_VF_STATISTICS */
495 if (r->hw_ctx_changed)
496 gen6_3DSTATE_VF_STATISTICS(r->builder, false);
497 }
498
499 void
500 gen6_draw_vs(struct ilo_render *r,
501 const struct ilo_state_vector *vec,
502 struct ilo_render_draw_session *session)
503 {
504 const bool emit_3dstate_vs = (DIRTY(VS) || r->instruction_bo_changed);
505 const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
506
507 /*
508 * the classic i965 does this in upload_vs_state(), citing a spec that I
509 * cannot find
510 */
511 if (emit_3dstate_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
512 gen6_wa_pre_non_pipelined(r);
513
514 /* 3DSTATE_CONSTANT_VS */
515 if (emit_3dstate_constant_vs) {
516 gen6_3DSTATE_CONSTANT_VS(r->builder,
517 &r->state.vs.PUSH_CONSTANT_BUFFER,
518 &r->state.vs.PUSH_CONSTANT_BUFFER_size,
519 1);
520 }
521
522 /* 3DSTATE_VS */
523 if (emit_3dstate_vs)
524 gen6_3DSTATE_VS(r->builder, vec->vs);
525
526 if (emit_3dstate_constant_vs && ilo_dev_gen(r->dev) == ILO_GEN(6))
527 gen6_wa_post_3dstate_constant_vs(r);
528 }
529
530 static void
531 gen6_draw_gs(struct ilo_render *r,
532 const struct ilo_state_vector *vec,
533 struct ilo_render_draw_session *session)
534 {
535 /* 3DSTATE_CONSTANT_GS */
536 if (session->pcb_gs_changed)
537 gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
538
539 /* 3DSTATE_GS */
540 if (DIRTY(GS) || DIRTY(VS) ||
541 session->prim_changed || r->instruction_bo_changed) {
542 if (vec->gs) {
543 gen6_3DSTATE_GS(r->builder, vec->gs);
544 } else if (vec->vs &&
545 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_VS_GEN6_SO)) {
546 const int verts_per_prim = u_vertices_per_prim(session->reduced_prim);
547 gen6_so_3DSTATE_GS(r->builder, vec->vs, verts_per_prim);
548 } else {
549 gen6_disable_3DSTATE_GS(r->builder);
550 }
551 }
552 }
553
554 static bool
555 gen6_draw_update_max_svbi(struct ilo_render *r,
556 const struct ilo_state_vector *vec,
557 struct ilo_render_draw_session *session)
558 {
559 if (DIRTY(VS) || DIRTY(GS) || DIRTY(SO)) {
560 const struct pipe_stream_output_info *so_info =
561 (vec->gs) ? ilo_shader_get_kernel_so_info(vec->gs) :
562 (vec->vs) ? ilo_shader_get_kernel_so_info(vec->vs) : NULL;
563 unsigned max_svbi = 0xffffffff;
564 int i;
565
566 for (i = 0; i < so_info->num_outputs; i++) {
567 const int output_buffer = so_info->output[i].output_buffer;
568 const struct pipe_stream_output_target *so =
569 vec->so.states[output_buffer];
570 const int struct_size = so_info->stride[output_buffer] * 4;
571 const int elem_size = so_info->output[i].num_components * 4;
572 int buf_size, count;
573
574 if (!so) {
575 max_svbi = 0;
576 break;
577 }
578
579 buf_size = so->buffer_size - so_info->output[i].dst_offset * 4;
580
581 count = buf_size / struct_size;
582 if (buf_size % struct_size >= elem_size)
583 count++;
584
585 if (count < max_svbi)
586 max_svbi = count;
587 }
588
589 if (r->state.so_max_vertices != max_svbi) {
590 r->state.so_max_vertices = max_svbi;
591 return true;
592 }
593 }
594
595 return false;
596 }
597
598 static void
599 gen6_draw_gs_svbi(struct ilo_render *r,
600 const struct ilo_state_vector *vec,
601 struct ilo_render_draw_session *session)
602 {
603 const bool emit = gen6_draw_update_max_svbi(r, vec, session);
604
605 /* 3DSTATE_GS_SVB_INDEX */
606 if (emit) {
607 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
608 gen6_wa_pre_non_pipelined(r);
609
610 gen6_3DSTATE_GS_SVB_INDEX(r->builder,
611 0, 0, r->state.so_max_vertices,
612 false);
613
614 if (r->hw_ctx_changed) {
615 int i;
616
617 /*
618 * From the Sandy Bridge PRM, volume 2 part 1, page 148:
619 *
620 * "If a buffer is not enabled then the SVBI must be set to 0x0
621 * in order to not cause overflow in that SVBI."
622 *
623 * "If a buffer is not enabled then the MaxSVBI must be set to
624 * 0xFFFFFFFF in order to not cause overflow in that SVBI."
625 */
626 for (i = 1; i < 4; i++) {
627 gen6_3DSTATE_GS_SVB_INDEX(r->builder,
628 i, 0, 0xffffffff, false);
629 }
630 }
631 }
632 }
633
634 void
635 gen6_draw_clip(struct ilo_render *r,
636 const struct ilo_state_vector *vec,
637 struct ilo_render_draw_session *session)
638 {
639 /* 3DSTATE_CLIP */
640 if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(VIEWPORT) || DIRTY(FB)) {
641 bool enable_guardband = true;
642 unsigned i;
643
644 /*
645 * Gen8+ has viewport extent test. Guard band test can be enabled on
646 * prior Gens only when the viewport is larger than the framebuffer,
647 * unless we emulate viewport extent test on them.
648 */
649 if (ilo_dev_gen(r->dev) < ILO_GEN(8)) {
650 for (i = 0; i < vec->viewport.count; i++) {
651 const struct ilo_viewport_cso *vp = &vec->viewport.cso[i];
652
653 if (vp->min_x > 0.0f || vp->max_x < vec->fb.state.width ||
654 vp->min_y > 0.0f || vp->max_y < vec->fb.state.height) {
655 enable_guardband = false;
656 break;
657 }
658 }
659 }
660
661 gen6_3DSTATE_CLIP(r->builder, vec->rasterizer,
662 vec->fs, enable_guardband, 1);
663 }
664 }
665
666 static void
667 gen6_draw_sf(struct ilo_render *r,
668 const struct ilo_state_vector *vec,
669 struct ilo_render_draw_session *session)
670 {
671 /* 3DSTATE_SF */
672 if (DIRTY(RASTERIZER) || DIRTY(FS) || DIRTY(FB)) {
673 gen6_3DSTATE_SF(r->builder, vec->rasterizer, vec->fs,
674 vec->fb.num_samples);
675 }
676 }
677
678 void
679 gen6_draw_sf_rect(struct ilo_render *r,
680 const struct ilo_state_vector *vec,
681 struct ilo_render_draw_session *session)
682 {
683 /* 3DSTATE_DRAWING_RECTANGLE */
684 if (DIRTY(FB)) {
685 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
686 gen6_wa_pre_non_pipelined(r);
687
688 gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
689 vec->fb.state.width, vec->fb.state.height);
690 }
691 }
692
693 static void
694 gen6_draw_wm(struct ilo_render *r,
695 const struct ilo_state_vector *vec,
696 struct ilo_render_draw_session *session)
697 {
698 /* 3DSTATE_CONSTANT_PS */
699 if (session->pcb_fs_changed) {
700 gen6_3DSTATE_CONSTANT_PS(r->builder,
701 &r->state.wm.PUSH_CONSTANT_BUFFER,
702 &r->state.wm.PUSH_CONSTANT_BUFFER_size,
703 1);
704 }
705
706 /* 3DSTATE_WM */
707 if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) ||
708 DIRTY(RASTERIZER) || r->instruction_bo_changed) {
709 const bool dual_blend = vec->blend->dual_blend;
710 const bool cc_may_kill = (vec->dsa->dw_blend_alpha ||
711 vec->blend->alpha_to_coverage);
712
713 if (ilo_dev_gen(r->dev) == ILO_GEN(6) && r->hw_ctx_changed)
714 gen6_wa_pre_3dstate_wm_max_threads(r);
715
716 gen6_3DSTATE_WM(r->builder, vec->fs,
717 vec->rasterizer, dual_blend, cc_may_kill);
718 }
719 }
720
721 static void
722 gen6_draw_wm_multisample(struct ilo_render *r,
723 const struct ilo_state_vector *vec,
724 struct ilo_render_draw_session *session)
725 {
726 /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
727 if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
728 const uint32_t *pattern;
729
730 pattern = (vec->fb.num_samples > 1) ?
731 &r->sample_pattern_4x : &r->sample_pattern_1x;
732
733 if (ilo_dev_gen(r->dev) == ILO_GEN(6)) {
734 gen6_wa_pre_non_pipelined(r);
735 gen6_wa_pre_3dstate_multisample(r);
736 }
737
738 gen6_3DSTATE_MULTISAMPLE(r->builder,
739 vec->fb.num_samples, pattern,
740 vec->rasterizer->state.half_pixel_center);
741
742 gen6_3DSTATE_SAMPLE_MASK(r->builder,
743 (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1);
744 }
745 }
746
747 static void
748 gen6_draw_wm_depth(struct ilo_render *r,
749 const struct ilo_state_vector *vec,
750 struct ilo_render_draw_session *session)
751 {
752 /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
753 if (DIRTY(FB) || r->batch_bo_changed) {
754 const struct ilo_zs_surface *zs;
755 uint32_t clear_params;
756
757 if (vec->fb.state.zsbuf) {
758 const struct ilo_surface_cso *surface =
759 (const struct ilo_surface_cso *) vec->fb.state.zsbuf;
760 const struct ilo_texture_slice *slice =
761 ilo_texture_get_slice(ilo_texture(surface->base.texture),
762 surface->base.u.tex.level, surface->base.u.tex.first_layer);
763
764 assert(!surface->is_rt);
765
766 zs = &surface->u.zs;
767 clear_params = slice->clear_value;
768 }
769 else {
770 zs = &vec->fb.null_zs;
771 clear_params = 0;
772 }
773
774 if (ilo_dev_gen(r->dev) == ILO_GEN(6)) {
775 gen6_wa_pre_non_pipelined(r);
776 gen6_wa_pre_depth(r);
777 }
778
779 gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs, false);
780 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
781 gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
782 gen6_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
783 }
784 }
785
786 void
787 gen6_draw_wm_raster(struct ilo_render *r,
788 const struct ilo_state_vector *vec,
789 struct ilo_render_draw_session *session)
790 {
791 /* 3DSTATE_POLY_STIPPLE_PATTERN and 3DSTATE_POLY_STIPPLE_OFFSET */
792 if ((DIRTY(RASTERIZER) || DIRTY(POLY_STIPPLE)) &&
793 vec->rasterizer->state.poly_stipple_enable) {
794 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
795 gen6_wa_pre_non_pipelined(r);
796
797 gen6_3DSTATE_POLY_STIPPLE_PATTERN(r->builder,
798 &vec->poly_stipple);
799
800 gen6_3DSTATE_POLY_STIPPLE_OFFSET(r->builder, 0, 0);
801 }
802
803 /* 3DSTATE_LINE_STIPPLE */
804 if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_stipple_enable) {
805 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
806 gen6_wa_pre_non_pipelined(r);
807
808 gen6_3DSTATE_LINE_STIPPLE(r->builder,
809 vec->rasterizer->state.line_stipple_pattern,
810 vec->rasterizer->state.line_stipple_factor + 1);
811 }
812
813 /* 3DSTATE_AA_LINE_PARAMETERS */
814 if (DIRTY(RASTERIZER) && vec->rasterizer->state.line_smooth) {
815 if (ilo_dev_gen(r->dev) == ILO_GEN(6))
816 gen6_wa_pre_non_pipelined(r);
817
818 gen6_3DSTATE_AA_LINE_PARAMETERS(r->builder);
819 }
820 }
821
822 #undef DIRTY
823
824 void
825 ilo_render_emit_draw_commands_gen6(struct ilo_render *render,
826 const struct ilo_state_vector *vec,
827 struct ilo_render_draw_session *session)
828 {
829 ILO_DEV_ASSERT(render->dev, 6, 6);
830
831 /*
832 * We try to keep the order of the commands match, as closely as possible,
833 * that of the classic i965 driver. It allows us to compare the command
834 * streams easily.
835 */
836 gen6_draw_common_select(render, vec, session);
837 gen6_draw_gs_svbi(render, vec, session);
838 gen6_draw_common_sip(render, vec, session);
839 gen6_draw_vf_statistics(render, vec, session);
840 gen6_draw_common_base_address(render, vec, session);
841 gen6_draw_common_pointers_1(render, vec, session);
842 gen6_draw_common_urb(render, vec, session);
843 gen6_draw_common_pointers_2(render, vec, session);
844 gen6_draw_wm_multisample(render, vec, session);
845 gen6_draw_vs(render, vec, session);
846 gen6_draw_gs(render, vec, session);
847 gen6_draw_clip(render, vec, session);
848 gen6_draw_sf(render, vec, session);
849 gen6_draw_wm(render, vec, session);
850 gen6_draw_common_pointers_3(render, vec, session);
851 gen6_draw_wm_depth(render, vec, session);
852 gen6_draw_wm_raster(render, vec, session);
853 gen6_draw_sf_rect(render, vec, session);
854 gen6_draw_vf(render, vec, session);
855
856 gen6_3dprimitive(render, vec->draw, &vec->ib);
857 }
858
859 static void
860 gen6_rectlist_vs_to_sf(struct ilo_render *r,
861 const struct ilo_blitter *blitter)
862 {
863 gen6_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
864 gen6_disable_3DSTATE_VS(r->builder);
865
866 gen6_wa_post_3dstate_constant_vs(r);
867
868 gen6_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
869 gen6_disable_3DSTATE_GS(r->builder);
870
871 gen6_disable_3DSTATE_CLIP(r->builder);
872 gen6_3DSTATE_SF(r->builder, NULL, NULL, blitter->fb.num_samples);
873 }
874
875 static void
876 gen6_rectlist_wm(struct ilo_render *r,
877 const struct ilo_blitter *blitter)
878 {
879 uint32_t hiz_op;
880
881 switch (blitter->op) {
882 case ILO_BLITTER_RECTLIST_CLEAR_ZS:
883 hiz_op = GEN6_WM_DW4_DEPTH_CLEAR;
884 break;
885 case ILO_BLITTER_RECTLIST_RESOLVE_Z:
886 hiz_op = GEN6_WM_DW4_DEPTH_RESOLVE;
887 break;
888 case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
889 hiz_op = GEN6_WM_DW4_HIZ_RESOLVE;
890 break;
891 default:
892 hiz_op = 0;
893 break;
894 }
895
896 gen6_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
897
898 gen6_wa_pre_3dstate_wm_max_threads(r);
899 gen6_hiz_3DSTATE_WM(r->builder, hiz_op);
900 }
901
902 static void
903 gen6_rectlist_wm_depth(struct ilo_render *r,
904 const struct ilo_blitter *blitter)
905 {
906 gen6_wa_pre_depth(r);
907
908 if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
909 ILO_BLITTER_USE_FB_STENCIL)) {
910 gen6_3DSTATE_DEPTH_BUFFER(r->builder,
911 &blitter->fb.dst.u.zs, true);
912 }
913
914 if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
915 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
916 &blitter->fb.dst.u.zs);
917 }
918
919 if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
920 gen6_3DSTATE_STENCIL_BUFFER(r->builder,
921 &blitter->fb.dst.u.zs);
922 }
923
924 gen6_3DSTATE_CLEAR_PARAMS(r->builder,
925 blitter->depth_clear_value);
926 }
927
928 static void
929 gen6_rectlist_wm_multisample(struct ilo_render *r,
930 const struct ilo_blitter *blitter)
931 {
932 const uint32_t *pattern = (blitter->fb.num_samples > 1) ?
933 &r->sample_pattern_4x : &r->sample_pattern_1x;
934
935 gen6_wa_pre_3dstate_multisample(r);
936
937 gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
938 pattern, true);
939
940 gen6_3DSTATE_SAMPLE_MASK(r->builder,
941 (1 << blitter->fb.num_samples) - 1);
942 }
943
944 int
945 ilo_render_get_rectlist_commands_len_gen6(const struct ilo_render *render,
946 const struct ilo_blitter *blitter)
947 {
948 ILO_DEV_ASSERT(render->dev, 6, 7.5);
949
950 return 256;
951 }
952
953 void
954 ilo_render_emit_rectlist_commands_gen6(struct ilo_render *r,
955 const struct ilo_blitter *blitter,
956 const struct ilo_render_rectlist_session *session)
957 {
958 ILO_DEV_ASSERT(r->dev, 6, 6);
959
960 gen6_wa_pre_non_pipelined(r);
961
962 gen6_rectlist_wm_multisample(r, blitter);
963
964 gen6_state_base_address(r->builder, true);
965
966 gen6_user_3DSTATE_VERTEX_BUFFERS(r->builder,
967 session->vb_start, session->vb_end,
968 sizeof(blitter->vertices[0]));
969
970 gen6_3DSTATE_VERTEX_ELEMENTS(r->builder, &blitter->ve);
971
972 gen6_3DSTATE_URB(r->builder, r->dev->urb_size, 0,
973 (blitter->ve.count + blitter->ve.prepend_nosrc_cso) * 4 * sizeof(float),
974 0);
975
976 /* 3DSTATE_URB workaround */
977 if (r->state.gs.active) {
978 ilo_render_emit_flush(r);
979 r->state.gs.active = false;
980 }
981
982 if (blitter->uses &
983 (ILO_BLITTER_USE_DSA | ILO_BLITTER_USE_CC)) {
984 gen6_3DSTATE_CC_STATE_POINTERS(r->builder, 0,
985 r->state.DEPTH_STENCIL_STATE, r->state.COLOR_CALC_STATE);
986 }
987
988 gen6_rectlist_vs_to_sf(r, blitter);
989 gen6_rectlist_wm(r, blitter);
990
991 if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
992 gen6_3DSTATE_VIEWPORT_STATE_POINTERS(r->builder,
993 0, 0, r->state.CC_VIEWPORT);
994 }
995
996 gen6_rectlist_wm_depth(r, blitter);
997
998 gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
999 blitter->fb.width, blitter->fb.height);
1000
1001 gen6_3dprimitive(r, &blitter->draw, NULL);
1002 }
1003
1004 int
1005 ilo_render_get_draw_commands_len_gen6(const struct ilo_render *render,
1006 const struct ilo_state_vector *vec)
1007 {
1008 static int len;
1009
1010 ILO_DEV_ASSERT(render->dev, 6, 6);
1011
1012 if (!len) {
1013 len += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 3;
1014 len += GEN6_3DSTATE_GS_SVB_INDEX__SIZE * 4;
1015 len += GEN6_PIPE_CONTROL__SIZE * 5;
1016
1017 len +=
1018 GEN6_STATE_BASE_ADDRESS__SIZE +
1019 GEN6_STATE_SIP__SIZE +
1020 GEN6_3DSTATE_VF_STATISTICS__SIZE +
1021 GEN6_PIPELINE_SELECT__SIZE +
1022 GEN6_3DSTATE_BINDING_TABLE_POINTERS__SIZE +
1023 GEN6_3DSTATE_SAMPLER_STATE_POINTERS__SIZE +
1024 GEN6_3DSTATE_URB__SIZE +
1025 GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
1026 GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
1027 GEN6_3DSTATE_INDEX_BUFFER__SIZE +
1028 GEN6_3DSTATE_VIEWPORT_STATE_POINTERS__SIZE +
1029 GEN6_3DSTATE_CC_STATE_POINTERS__SIZE +
1030 GEN6_3DSTATE_SCISSOR_STATE_POINTERS__SIZE +
1031 GEN6_3DSTATE_VS__SIZE +
1032 GEN6_3DSTATE_GS__SIZE +
1033 GEN6_3DSTATE_CLIP__SIZE +
1034 GEN6_3DSTATE_SF__SIZE +
1035 GEN6_3DSTATE_WM__SIZE +
1036 GEN6_3DSTATE_SAMPLE_MASK__SIZE +
1037 GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
1038 GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
1039 GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
1040 GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
1041 GEN6_3DSTATE_LINE_STIPPLE__SIZE +
1042 GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
1043 GEN6_3DSTATE_MULTISAMPLE__SIZE +
1044 GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
1045 GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
1046 GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
1047 GEN6_3DPRIMITIVE__SIZE;
1048 }
1049
1050 return len;
1051 }