ilo: make ilo_render_emit_draw() direct
[mesa.git] / src / gallium / drivers / ilo / ilo_render_gen7.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "genhw/genhw.h"
29 #include "util/u_dual_blend.h"
30
31 #include "ilo_blitter.h"
32 #include "ilo_builder_3d.h"
33 #include "ilo_builder_render.h"
34 #include "ilo_shader.h"
35 #include "ilo_state.h"
36 #include "ilo_render.h"
37 #include "ilo_render_gen.h"
38
39 /**
40 * A wrapper for gen6_PIPE_CONTROL().
41 */
42 static inline void
43 gen7_pipe_control(struct ilo_render *r, uint32_t dw1)
44 {
45 struct intel_bo *bo = (dw1 & GEN6_PIPE_CONTROL_WRITE__MASK) ?
46 r->workaround_bo : NULL;
47
48 ILO_DEV_ASSERT(r->dev, 7, 7.5);
49
50 if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) {
51 /* CS stall cannot be set alone */
52 const uint32_t mask = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH |
53 GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
54 GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL |
55 GEN6_PIPE_CONTROL_DEPTH_STALL |
56 GEN6_PIPE_CONTROL_WRITE__MASK;
57 if (!(dw1 & mask))
58 dw1 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
59 }
60
61 gen6_PIPE_CONTROL(r->builder, dw1, bo, 0, false);
62
63
64 r->state.current_pipe_control_dw1 |= dw1;
65 r->state.deferred_pipe_control_dw1 &= ~dw1;
66 }
67
68 static void
69 gen7_wa_post_3dstate_push_constant_alloc_ps(struct ilo_render *r)
70 {
71 /*
72 * From the Ivy Bridge PRM, volume 2 part 1, page 292:
73 *
74 * "A PIPE_CONTOL command with the CS Stall bit set must be programmed
75 * in the ring after this instruction
76 * (3DSTATE_PUSH_CONSTANT_ALLOC_PS)."
77 */
78 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
79
80 ILO_DEV_ASSERT(r->dev, 7, 7.5);
81
82 r->state.deferred_pipe_control_dw1 |= dw1;
83 }
84
85 static void
86 gen7_wa_pre_vs(struct ilo_render *r)
87 {
88 /*
89 * From the Ivy Bridge PRM, volume 2 part 1, page 106:
90 *
91 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
92 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
93 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
94 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
95 * needs to be sent before any combination of VS associated 3DSTATE."
96 */
97 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL |
98 GEN6_PIPE_CONTROL_WRITE_IMM;
99
100 ILO_DEV_ASSERT(r->dev, 7, 7.5);
101
102 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
103 gen7_pipe_control(r, dw1);
104 }
105
106 static void
107 gen7_wa_pre_3dstate_sf_depth_bias(struct ilo_render *r)
108 {
109 /*
110 * From the Ivy Bridge PRM, volume 2 part 1, page 258:
111 *
112 * "Due to an HW issue driver needs to send a pipe control with stall
113 * when ever there is state change in depth bias related state (in
114 * 3DSTATE_SF)"
115 */
116 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL;
117
118 ILO_DEV_ASSERT(r->dev, 7, 7.5);
119
120 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
121 gen7_pipe_control(r, dw1);
122 }
123
124 static void
125 gen7_wa_pre_3dstate_multisample(struct ilo_render *r)
126 {
127 /*
128 * From the Ivy Bridge PRM, volume 2 part 1, page 304:
129 *
130 * "Driver must ierarchi that all the caches in the depth pipe are
131 * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This
132 * requires driver to send a PIPE_CONTROL with a CS stall along with a
133 * Depth Flush prior to this command.
134 */
135 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH |
136 GEN6_PIPE_CONTROL_CS_STALL;
137
138 ILO_DEV_ASSERT(r->dev, 7, 7.5);
139
140 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
141 gen7_pipe_control(r, dw1);
142 }
143
144 static void
145 gen7_wa_pre_depth(struct ilo_render *r)
146 {
147 /*
148 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
149 *
150 * "Driver must send a least one PIPE_CONTROL command with CS Stall and
151 * a post sync operation prior to the group of depth
152 * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
153 * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)."
154 */
155 const uint32_t dw1 = GEN6_PIPE_CONTROL_CS_STALL |
156 GEN6_PIPE_CONTROL_WRITE_IMM;
157
158 ILO_DEV_ASSERT(r->dev, 7, 7.5);
159
160 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
161 gen7_pipe_control(r, dw1);
162
163 /*
164 * From the Ivy Bridge PRM, volume 2 part 1, page 315:
165 *
166 * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e.,
167 * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS,
168 * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first
169 * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit
170 * set), followed by a pipelined depth cache flush (PIPE_CONTROL with
171 * Depth Flush Bit set, followed by another pipelined depth stall
172 * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise
173 * guarantee that the pipeline from WM onwards is already flushed
174 * (e.g., via a preceding MI_FLUSH)."
175 */
176 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
177 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH);
178 gen7_pipe_control(r, GEN6_PIPE_CONTROL_DEPTH_STALL);
179 }
180
181 static void
182 gen7_wa_pre_3dstate_ps_max_threads(struct ilo_render *r)
183 {
184 /*
185 * From the Ivy Bridge PRM, volume 2 part 1, page 286:
186 *
187 * "If this field (Maximum Number of Threads in 3DSTATE_PS) is changed
188 * between 3DPRIMITIVE commands, a PIPE_CONTROL command with Stall at
189 * Pixel Scoreboard set is required to be issued."
190 */
191 const uint32_t dw1 = GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL;
192
193 ILO_DEV_ASSERT(r->dev, 7, 7.5);
194
195 if ((r->state.current_pipe_control_dw1 & dw1) != dw1)
196 gen7_pipe_control(r, dw1);
197 }
198
199 static void
200 gen7_wa_post_ps_and_later(struct ilo_render *r)
201 {
202 /*
203 * From the Ivy Bridge PRM, volume 2 part 1, page 276:
204 *
205 * "The driver must make sure a PIPE_CONTROL with the Depth Stall
206 * Enable bit set after all the following states are programmed:
207 *
208 * - 3DSTATE_PS
209 * - 3DSTATE_VIEWPORT_STATE_POINTERS_CC
210 * - 3DSTATE_CONSTANT_PS
211 * - 3DSTATE_BINDING_TABLE_POINTERS_PS
212 * - 3DSTATE_SAMPLER_STATE_POINTERS_PS
213 * - 3DSTATE_CC_STATE_POINTERS
214 * - 3DSTATE_BLEND_STATE_POINTERS
215 * - 3DSTATE_DEPTH_STENCIL_STATE_POINTERS"
216 */
217 const uint32_t dw1 = GEN6_PIPE_CONTROL_DEPTH_STALL;
218
219 ILO_DEV_ASSERT(r->dev, 7, 7.5);
220
221 r->state.deferred_pipe_control_dw1 |= dw1;
222 }
223
224 #define DIRTY(state) (session->pipe_dirty & ILO_DIRTY_ ## state)
225
226 static void
227 gen7_draw_common_urb(struct ilo_render *r,
228 const struct ilo_state_vector *vec,
229 struct gen6_draw_session *session)
230 {
231 /* 3DSTATE_URB_{VS,GS,HS,DS} */
232 if (DIRTY(VE) || DIRTY(VS)) {
233 /* the first 16KB are reserved for VS and PS PCBs */
234 const int offset = (ilo_dev_gen(r->dev) == ILO_GEN(7.5) &&
235 r->dev->gt == 3) ? 32768 : 16384;
236 int vs_entry_size, vs_total_size;
237
238 vs_entry_size = (vec->vs) ?
239 ilo_shader_get_kernel_param(vec->vs, ILO_KERNEL_OUTPUT_COUNT) : 0;
240
241 /*
242 * From the Ivy Bridge PRM, volume 2 part 1, page 35:
243 *
244 * "Programming Restriction: As the VS URB entry serves as both the
245 * per-vertex input and output of the VS shader, the VS URB
246 * Allocation Size must be sized to the maximum of the vertex input
247 * and output structures."
248 */
249 if (vs_entry_size < vec->ve->count)
250 vs_entry_size = vec->ve->count;
251
252 vs_entry_size *= sizeof(float) * 4;
253 vs_total_size = r->dev->urb_size - offset;
254
255 gen7_wa_pre_vs(r);
256
257 gen7_3DSTATE_URB_VS(r->builder,
258 offset, vs_total_size, vs_entry_size);
259
260 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
261 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
262 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
263 }
264 }
265
266 static void
267 gen7_draw_common_pcb_alloc(struct ilo_render *r,
268 const struct ilo_state_vector *vec,
269 struct gen6_draw_session *session)
270 {
271 /* 3DSTATE_PUSH_CONSTANT_ALLOC_{VS,PS} */
272 if (r->hw_ctx_changed) {
273 /*
274 * Push constant buffers are only allowed to take up at most the first
275 * 16KB of the URB. Split the space evenly for VS and FS.
276 */
277 const int max_size = (ilo_dev_gen(r->dev) == ILO_GEN(7.5) &&
278 r->dev->gt == 3) ? 32768 : 16384;
279 const int size = max_size / 2;
280 int offset = 0;
281
282 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
283 offset += size;
284
285 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
286
287 if (ilo_dev_gen(r->dev) == ILO_GEN(7))
288 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
289 }
290 }
291
292 static void
293 gen7_draw_common_pointers_1(struct ilo_render *r,
294 const struct ilo_state_vector *vec,
295 struct gen6_draw_session *session)
296 {
297 /* 3DSTATE_VIEWPORT_STATE_POINTERS_{CC,SF_CLIP} */
298 if (session->viewport_changed) {
299 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
300 r->state.CC_VIEWPORT);
301
302 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(r->builder,
303 r->state.SF_CLIP_VIEWPORT);
304 }
305 }
306
307 static void
308 gen7_draw_common_pointers_2(struct ilo_render *r,
309 const struct ilo_state_vector *vec,
310 struct gen6_draw_session *session)
311 {
312 /* 3DSTATE_BLEND_STATE_POINTERS */
313 if (session->blend_changed) {
314 gen7_3DSTATE_BLEND_STATE_POINTERS(r->builder,
315 r->state.BLEND_STATE);
316 }
317
318 /* 3DSTATE_CC_STATE_POINTERS */
319 if (session->cc_changed) {
320 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
321 r->state.COLOR_CALC_STATE);
322 }
323
324 /* 3DSTATE_DEPTH_STENCIL_STATE_POINTERS */
325 if (session->dsa_changed) {
326 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
327 r->state.DEPTH_STENCIL_STATE);
328 }
329 }
330
331 static void
332 gen7_draw_vs(struct ilo_render *r,
333 const struct ilo_state_vector *vec,
334 struct gen6_draw_session *session)
335 {
336 const bool emit_3dstate_binding_table =
337 session->binding_table_vs_changed;
338 const bool emit_3dstate_sampler_state =
339 session->sampler_vs_changed;
340 /* see gen6_draw_vs() */
341 const bool emit_3dstate_constant_vs = session->pcb_vs_changed;
342 const bool emit_3dstate_vs = (DIRTY(VS) || DIRTY(SAMPLER_VS) ||
343 r->instruction_bo_changed);
344
345 /* emit depth stall before any of the VS commands */
346 if (emit_3dstate_binding_table || emit_3dstate_sampler_state ||
347 emit_3dstate_constant_vs || emit_3dstate_vs)
348 gen7_wa_pre_vs(r);
349
350 /* 3DSTATE_BINDING_TABLE_POINTERS_VS */
351 if (emit_3dstate_binding_table) {
352 gen7_3DSTATE_BINDING_TABLE_POINTERS_VS(r->builder,
353 r->state.vs.BINDING_TABLE_STATE);
354 }
355
356 /* 3DSTATE_SAMPLER_STATE_POINTERS_VS */
357 if (emit_3dstate_sampler_state) {
358 gen7_3DSTATE_SAMPLER_STATE_POINTERS_VS(r->builder,
359 r->state.vs.SAMPLER_STATE);
360 }
361
362 /* 3DSTATE_CONSTANT_VS */
363 if (emit_3dstate_constant_vs) {
364 gen7_3DSTATE_CONSTANT_VS(r->builder,
365 &r->state.vs.PUSH_CONSTANT_BUFFER,
366 &r->state.vs.PUSH_CONSTANT_BUFFER_size,
367 1);
368 }
369
370 /* 3DSTATE_VS */
371 if (emit_3dstate_vs) {
372 const int num_samplers = vec->sampler[PIPE_SHADER_VERTEX].count;
373
374 gen6_3DSTATE_VS(r->builder, vec->vs, num_samplers);
375 }
376 }
377
378 static void
379 gen7_draw_hs(struct ilo_render *r,
380 const struct ilo_state_vector *vec,
381 struct gen6_draw_session *session)
382 {
383 /* 3DSTATE_CONSTANT_HS and 3DSTATE_HS */
384 if (r->hw_ctx_changed) {
385 gen7_3DSTATE_CONSTANT_HS(r->builder, 0, 0, 0);
386 gen7_3DSTATE_HS(r->builder, NULL, 0);
387 }
388
389 /* 3DSTATE_BINDING_TABLE_POINTERS_HS */
390 if (r->hw_ctx_changed)
391 gen7_3DSTATE_BINDING_TABLE_POINTERS_HS(r->builder, 0);
392 }
393
394 static void
395 gen7_draw_te(struct ilo_render *r,
396 const struct ilo_state_vector *vec,
397 struct gen6_draw_session *session)
398 {
399 /* 3DSTATE_TE */
400 if (r->hw_ctx_changed)
401 gen7_3DSTATE_TE(r->builder);
402 }
403
404 static void
405 gen7_draw_ds(struct ilo_render *r,
406 const struct ilo_state_vector *vec,
407 struct gen6_draw_session *session)
408 {
409 /* 3DSTATE_CONSTANT_DS and 3DSTATE_DS */
410 if (r->hw_ctx_changed) {
411 gen7_3DSTATE_CONSTANT_DS(r->builder, 0, 0, 0);
412 gen7_3DSTATE_DS(r->builder, NULL, 0);
413 }
414
415 /* 3DSTATE_BINDING_TABLE_POINTERS_DS */
416 if (r->hw_ctx_changed)
417 gen7_3DSTATE_BINDING_TABLE_POINTERS_DS(r->builder, 0);
418
419 }
420
421 static void
422 gen7_draw_gs(struct ilo_render *r,
423 const struct ilo_state_vector *vec,
424 struct gen6_draw_session *session)
425 {
426 /* 3DSTATE_CONSTANT_GS and 3DSTATE_GS */
427 if (r->hw_ctx_changed) {
428 gen7_3DSTATE_CONSTANT_GS(r->builder, 0, 0, 0);
429 gen7_3DSTATE_GS(r->builder, NULL, 0);
430 }
431
432 /* 3DSTATE_BINDING_TABLE_POINTERS_GS */
433 if (session->binding_table_gs_changed) {
434 gen7_3DSTATE_BINDING_TABLE_POINTERS_GS(r->builder,
435 r->state.gs.BINDING_TABLE_STATE);
436 }
437 }
438
439 static void
440 gen7_draw_sol(struct ilo_render *r,
441 const struct ilo_state_vector *vec,
442 struct gen6_draw_session *session)
443 {
444 const struct pipe_stream_output_info *so_info;
445 const struct ilo_shader_state *shader;
446 bool dirty_sh = false;
447
448 if (vec->gs) {
449 shader = vec->gs;
450 dirty_sh = DIRTY(GS);
451 }
452 else {
453 shader = vec->vs;
454 dirty_sh = DIRTY(VS);
455 }
456
457 so_info = ilo_shader_get_kernel_so_info(shader);
458
459 /* 3DSTATE_SO_BUFFER */
460 if ((DIRTY(SO) || dirty_sh || r->batch_bo_changed) &&
461 vec->so.enabled) {
462 int i;
463
464 for (i = 0; i < vec->so.count; i++) {
465 const int stride = so_info->stride[i] * 4; /* in bytes */
466 int base = 0;
467
468 gen7_3DSTATE_SO_BUFFER(r->builder, i, base, stride,
469 vec->so.states[i]);
470 }
471
472 for (; i < 4; i++)
473 gen7_3DSTATE_SO_BUFFER(r->builder, i, 0, 0, NULL);
474 }
475
476 /* 3DSTATE_SO_DECL_LIST */
477 if (dirty_sh && vec->so.enabled)
478 gen7_3DSTATE_SO_DECL_LIST(r->builder, so_info);
479
480 /* 3DSTATE_STREAMOUT */
481 if (DIRTY(SO) || DIRTY(RASTERIZER) || dirty_sh) {
482 const unsigned buffer_mask = (1 << vec->so.count) - 1;
483 const int output_count = ilo_shader_get_kernel_param(shader,
484 ILO_KERNEL_OUTPUT_COUNT);
485
486 gen7_3DSTATE_STREAMOUT(r->builder, buffer_mask, output_count,
487 vec->rasterizer->state.rasterizer_discard);
488 }
489 }
490
491 static void
492 gen7_draw_sf(struct ilo_render *r,
493 const struct ilo_state_vector *vec,
494 struct gen6_draw_session *session)
495 {
496 /* 3DSTATE_SBE */
497 if (DIRTY(RASTERIZER) || DIRTY(FS))
498 gen7_3DSTATE_SBE(r->builder, vec->rasterizer, vec->fs);
499
500 /* 3DSTATE_SF */
501 if (DIRTY(RASTERIZER) || DIRTY(FB)) {
502 struct pipe_surface *zs = vec->fb.state.zsbuf;
503
504 gen7_wa_pre_3dstate_sf_depth_bias(r);
505 gen7_3DSTATE_SF(r->builder, vec->rasterizer,
506 (zs) ? zs->format : PIPE_FORMAT_NONE);
507 }
508 }
509
510 static void
511 gen7_draw_wm(struct ilo_render *r,
512 const struct ilo_state_vector *vec,
513 struct gen6_draw_session *session)
514 {
515 /* 3DSTATE_WM */
516 if (DIRTY(FS) || DIRTY(BLEND) || DIRTY(DSA) || DIRTY(RASTERIZER)) {
517 const bool cc_may_kill = (vec->dsa->dw_alpha ||
518 vec->blend->alpha_to_coverage);
519
520 gen7_3DSTATE_WM(r->builder, vec->fs,
521 vec->rasterizer, cc_may_kill, 0);
522 }
523
524 /* 3DSTATE_BINDING_TABLE_POINTERS_PS */
525 if (session->binding_table_fs_changed) {
526 gen7_3DSTATE_BINDING_TABLE_POINTERS_PS(r->builder,
527 r->state.wm.BINDING_TABLE_STATE);
528 }
529
530 /* 3DSTATE_SAMPLER_STATE_POINTERS_PS */
531 if (session->sampler_fs_changed) {
532 gen7_3DSTATE_SAMPLER_STATE_POINTERS_PS(r->builder,
533 r->state.wm.SAMPLER_STATE);
534 }
535
536 /* 3DSTATE_CONSTANT_PS */
537 if (session->pcb_fs_changed) {
538 gen7_3DSTATE_CONSTANT_PS(r->builder,
539 &r->state.wm.PUSH_CONSTANT_BUFFER,
540 &r->state.wm.PUSH_CONSTANT_BUFFER_size,
541 1);
542 }
543
544 /* 3DSTATE_PS */
545 if (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND) ||
546 r->instruction_bo_changed) {
547 const int num_samplers = vec->sampler[PIPE_SHADER_FRAGMENT].count;
548 const bool dual_blend = vec->blend->dual_blend;
549
550 if ((ilo_dev_gen(r->dev) == ILO_GEN(7) ||
551 ilo_dev_gen(r->dev) == ILO_GEN(7.5)) &&
552 r->hw_ctx_changed)
553 gen7_wa_pre_3dstate_ps_max_threads(r);
554
555 gen7_3DSTATE_PS(r->builder, vec->fs, num_samplers, dual_blend);
556 }
557
558 /* 3DSTATE_SCISSOR_STATE_POINTERS */
559 if (session->scissor_changed) {
560 gen6_3DSTATE_SCISSOR_STATE_POINTERS(r->builder,
561 r->state.SCISSOR_RECT);
562 }
563
564 /* XXX what is the best way to know if this workaround is needed? */
565 {
566 const bool emit_3dstate_ps =
567 (DIRTY(FS) || DIRTY(SAMPLER_FS) || DIRTY(BLEND));
568 const bool emit_3dstate_depth_buffer =
569 (DIRTY(FB) || DIRTY(DSA) || r->state_bo_changed);
570
571 if (emit_3dstate_ps ||
572 session->pcb_fs_changed ||
573 session->viewport_changed ||
574 session->binding_table_fs_changed ||
575 session->sampler_fs_changed ||
576 session->cc_changed ||
577 session->blend_changed ||
578 session->dsa_changed)
579 gen7_wa_post_ps_and_later(r);
580
581 if (emit_3dstate_depth_buffer)
582 gen7_wa_pre_depth(r);
583 }
584
585 /* 3DSTATE_DEPTH_BUFFER and 3DSTATE_CLEAR_PARAMS */
586 if (DIRTY(FB) || r->batch_bo_changed) {
587 const struct ilo_zs_surface *zs;
588 uint32_t clear_params;
589
590 if (vec->fb.state.zsbuf) {
591 const struct ilo_surface_cso *surface =
592 (const struct ilo_surface_cso *) vec->fb.state.zsbuf;
593 const struct ilo_texture_slice *slice =
594 ilo_texture_get_slice(ilo_texture(surface->base.texture),
595 surface->base.u.tex.level, surface->base.u.tex.first_layer);
596
597 assert(!surface->is_rt);
598 zs = &surface->u.zs;
599 clear_params = slice->clear_value;
600 }
601 else {
602 zs = &vec->fb.null_zs;
603 clear_params = 0;
604 }
605
606 gen6_3DSTATE_DEPTH_BUFFER(r->builder, zs);
607 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder, zs);
608 gen6_3DSTATE_STENCIL_BUFFER(r->builder, zs);
609 gen7_3DSTATE_CLEAR_PARAMS(r->builder, clear_params);
610 }
611 }
612
613 static void
614 gen7_draw_wm_multisample(struct ilo_render *r,
615 const struct ilo_state_vector *vec,
616 struct gen6_draw_session *session)
617 {
618 /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */
619 if (DIRTY(SAMPLE_MASK) || DIRTY(FB)) {
620 const uint32_t *packed_sample_pos;
621
622 gen7_wa_pre_3dstate_multisample(r);
623
624 packed_sample_pos =
625 (vec->fb.num_samples > 4) ? r->packed_sample_position_8x :
626 (vec->fb.num_samples > 1) ? &r->packed_sample_position_4x :
627 &r->packed_sample_position_1x;
628
629 gen6_3DSTATE_MULTISAMPLE(r->builder,
630 vec->fb.num_samples, packed_sample_pos,
631 vec->rasterizer->state.half_pixel_center);
632
633 gen7_3DSTATE_SAMPLE_MASK(r->builder,
634 (vec->fb.num_samples > 1) ? vec->sample_mask : 0x1,
635 vec->fb.num_samples);
636 }
637 }
638
639 static void
640 gen7_draw_vf_draw(struct ilo_render *r,
641 const struct ilo_state_vector *vec,
642 struct gen6_draw_session *session)
643 {
644 if (r->state.deferred_pipe_control_dw1)
645 gen7_pipe_control(r, r->state.deferred_pipe_control_dw1);
646
647 /* 3DPRIMITIVE */
648 gen7_3DPRIMITIVE(r->builder, vec->draw, &vec->ib);
649
650 r->state.current_pipe_control_dw1 = 0;
651 r->state.deferred_pipe_control_dw1 = 0;
652 }
653
654 void
655 ilo_render_emit_draw_commands_gen7(struct ilo_render *render,
656 const struct ilo_state_vector *vec,
657 struct gen6_draw_session *session)
658 {
659 ILO_DEV_ASSERT(render->dev, 7, 7.5);
660
661 /*
662 * We try to keep the order of the commands match, as closely as possible,
663 * that of the classic i965 driver. It allows us to compare the command
664 * streams easily.
665 */
666 gen6_draw_common_select(render, vec, session);
667 gen6_draw_common_sip(render, vec, session);
668 gen6_draw_vf_statistics(render, vec, session);
669 gen7_draw_common_pcb_alloc(render, vec, session);
670 gen6_draw_common_base_address(render, vec, session);
671 gen7_draw_common_pointers_1(render, vec, session);
672 gen7_draw_common_urb(render, vec, session);
673 gen7_draw_common_pointers_2(render, vec, session);
674 gen7_draw_wm_multisample(render, vec, session);
675 gen7_draw_gs(render, vec, session);
676 gen7_draw_hs(render, vec, session);
677 gen7_draw_te(render, vec, session);
678 gen7_draw_ds(render, vec, session);
679 gen7_draw_vs(render, vec, session);
680 gen7_draw_sol(render, vec, session);
681 gen6_draw_clip(render, vec, session);
682 gen7_draw_sf(render, vec, session);
683 gen7_draw_wm(render, vec, session);
684 gen6_draw_wm_raster(render, vec, session);
685 gen6_draw_sf_rect(render, vec, session);
686 gen6_draw_vf(render, vec, session);
687 gen7_draw_vf_draw(render, vec, session);
688 }
689
690 static void
691 gen7_rectlist_pcb_alloc(struct ilo_render *r,
692 const struct ilo_blitter *blitter)
693 {
694 /*
695 * Push constant buffers are only allowed to take up at most the first
696 * 16KB of the URB. Split the space evenly for VS and FS.
697 */
698 const int max_size =
699 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? 32768 : 16384;
700 const int size = max_size / 2;
701 int offset = 0;
702
703 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_VS(r->builder, offset, size);
704 offset += size;
705
706 gen7_3DSTATE_PUSH_CONSTANT_ALLOC_PS(r->builder, offset, size);
707
708 gen7_wa_post_3dstate_push_constant_alloc_ps(r);
709 }
710
711 static void
712 gen7_rectlist_urb(struct ilo_render *r,
713 const struct ilo_blitter *blitter)
714 {
715 /* the first 16KB are reserved for VS and PS PCBs */
716 const int offset =
717 (ilo_dev_gen(r->dev) == ILO_GEN(7.5) && r->dev->gt == 3) ? 32768 : 16384;
718
719 gen7_3DSTATE_URB_VS(r->builder, offset, r->dev->urb_size - offset,
720 blitter->ve.count * 4 * sizeof(float));
721
722 gen7_3DSTATE_URB_GS(r->builder, offset, 0, 0);
723 gen7_3DSTATE_URB_HS(r->builder, offset, 0, 0);
724 gen7_3DSTATE_URB_DS(r->builder, offset, 0, 0);
725 }
726
727 static void
728 gen7_rectlist_vs_to_sf(struct ilo_render *r,
729 const struct ilo_blitter *blitter)
730 {
731 gen7_3DSTATE_CONSTANT_VS(r->builder, NULL, NULL, 0);
732 gen6_3DSTATE_VS(r->builder, NULL, 0);
733
734 gen7_3DSTATE_CONSTANT_HS(r->builder, NULL, NULL, 0);
735 gen7_3DSTATE_HS(r->builder, NULL, 0);
736
737 gen7_3DSTATE_TE(r->builder);
738
739 gen7_3DSTATE_CONSTANT_DS(r->builder, NULL, NULL, 0);
740 gen7_3DSTATE_DS(r->builder, NULL, 0);
741
742 gen7_3DSTATE_CONSTANT_GS(r->builder, NULL, NULL, 0);
743 gen7_3DSTATE_GS(r->builder, NULL, 0);
744
745 gen7_3DSTATE_STREAMOUT(r->builder, 0x0, 0, false);
746
747 gen6_3DSTATE_CLIP(r->builder, NULL, NULL, false, 0);
748
749 gen7_wa_pre_3dstate_sf_depth_bias(r);
750
751 gen7_3DSTATE_SF(r->builder, NULL, blitter->fb.dst.base.format);
752 gen7_3DSTATE_SBE(r->builder, NULL, NULL);
753 }
754
755 static void
756 gen7_rectlist_wm(struct ilo_render *r,
757 const struct ilo_blitter *blitter)
758 {
759 uint32_t hiz_op;
760
761 switch (blitter->op) {
762 case ILO_BLITTER_RECTLIST_CLEAR_ZS:
763 hiz_op = GEN7_WM_DW1_DEPTH_CLEAR;
764 break;
765 case ILO_BLITTER_RECTLIST_RESOLVE_Z:
766 hiz_op = GEN7_WM_DW1_DEPTH_RESOLVE;
767 break;
768 case ILO_BLITTER_RECTLIST_RESOLVE_HIZ:
769 hiz_op = GEN7_WM_DW1_HIZ_RESOLVE;
770 break;
771 default:
772 hiz_op = 0;
773 break;
774 }
775
776 gen7_3DSTATE_WM(r->builder, NULL, NULL, false, hiz_op);
777
778 gen7_3DSTATE_CONSTANT_PS(r->builder, NULL, NULL, 0);
779
780 gen7_wa_pre_3dstate_ps_max_threads(r);
781 gen7_3DSTATE_PS(r->builder, NULL, 0, false);
782 }
783
784 static void
785 gen7_rectlist_wm_depth(struct ilo_render *r,
786 const struct ilo_blitter *blitter)
787 {
788 gen7_wa_pre_depth(r);
789
790 if (blitter->uses & (ILO_BLITTER_USE_FB_DEPTH |
791 ILO_BLITTER_USE_FB_STENCIL)) {
792 gen6_3DSTATE_DEPTH_BUFFER(r->builder,
793 &blitter->fb.dst.u.zs);
794 }
795
796 if (blitter->uses & ILO_BLITTER_USE_FB_DEPTH) {
797 gen6_3DSTATE_HIER_DEPTH_BUFFER(r->builder,
798 &blitter->fb.dst.u.zs);
799 }
800
801 if (blitter->uses & ILO_BLITTER_USE_FB_STENCIL) {
802 gen6_3DSTATE_STENCIL_BUFFER(r->builder,
803 &blitter->fb.dst.u.zs);
804 }
805
806 gen7_3DSTATE_CLEAR_PARAMS(r->builder,
807 blitter->depth_clear_value);
808 }
809
810 static void
811 gen7_rectlist_wm_multisample(struct ilo_render *r,
812 const struct ilo_blitter *blitter)
813 {
814 const uint32_t *packed_sample_pos =
815 (blitter->fb.num_samples > 4) ? r->packed_sample_position_8x :
816 (blitter->fb.num_samples > 1) ? &r->packed_sample_position_4x :
817 &r->packed_sample_position_1x;
818
819 gen7_wa_pre_3dstate_multisample(r);
820
821 gen6_3DSTATE_MULTISAMPLE(r->builder, blitter->fb.num_samples,
822 packed_sample_pos, true);
823
824 gen7_3DSTATE_SAMPLE_MASK(r->builder,
825 (1 << blitter->fb.num_samples) - 1, blitter->fb.num_samples);
826 }
827
828 void
829 ilo_render_emit_rectlist_commands_gen7(struct ilo_render *r,
830 const struct ilo_blitter *blitter)
831 {
832 ILO_DEV_ASSERT(r->dev, 7, 7.5);
833
834 gen7_rectlist_wm_multisample(r, blitter);
835
836 gen6_state_base_address(r->builder, true);
837
838 gen6_3DSTATE_VERTEX_BUFFERS(r->builder,
839 &blitter->ve, &blitter->vb);
840
841 gen6_3DSTATE_VERTEX_ELEMENTS(r->builder,
842 &blitter->ve, false, false);
843
844 gen7_rectlist_pcb_alloc(r, blitter);
845
846 /* needed for any VS-related commands */
847 gen7_wa_pre_vs(r);
848
849 gen7_rectlist_urb(r, blitter);
850
851 if (blitter->uses & ILO_BLITTER_USE_DSA) {
852 gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(r->builder,
853 r->state.DEPTH_STENCIL_STATE);
854 }
855
856 if (blitter->uses & ILO_BLITTER_USE_CC) {
857 gen7_3DSTATE_CC_STATE_POINTERS(r->builder,
858 r->state.COLOR_CALC_STATE);
859 }
860
861 gen7_rectlist_vs_to_sf(r, blitter);
862 gen7_rectlist_wm(r, blitter);
863
864 if (blitter->uses & ILO_BLITTER_USE_VIEWPORT) {
865 gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(r->builder,
866 r->state.CC_VIEWPORT);
867 }
868
869 gen7_rectlist_wm_depth(r, blitter);
870
871 gen6_3DSTATE_DRAWING_RECTANGLE(r->builder, 0, 0,
872 blitter->fb.width, blitter->fb.height);
873
874 gen7_3DPRIMITIVE(r->builder, &blitter->draw, NULL);
875 }
876
877 int
878 ilo_render_get_draw_commands_len_gen7(const struct ilo_render *render,
879 const struct ilo_state_vector *vec)
880 {
881 static int len;
882
883 ILO_DEV_ASSERT(render->dev, 7, 7.5);
884
885 if (!len) {
886 len += GEN7_3DSTATE_URB_ANY__SIZE * 4;
887 len += GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_ANY__SIZE * 5;
888 len += GEN6_3DSTATE_CONSTANT_ANY__SIZE * 5;
889 len += GEN7_3DSTATE_POINTERS_ANY__SIZE * (5 + 5 + 4);
890 len += GEN7_3DSTATE_SO_BUFFER__SIZE * 4;
891 len += GEN6_PIPE_CONTROL__SIZE * 5;
892
893 len +=
894 GEN6_STATE_BASE_ADDRESS__SIZE +
895 GEN6_STATE_SIP__SIZE +
896 GEN6_3DSTATE_VF_STATISTICS__SIZE +
897 GEN6_PIPELINE_SELECT__SIZE +
898 GEN6_3DSTATE_CLEAR_PARAMS__SIZE +
899 GEN6_3DSTATE_DEPTH_BUFFER__SIZE +
900 GEN6_3DSTATE_STENCIL_BUFFER__SIZE +
901 GEN6_3DSTATE_HIER_DEPTH_BUFFER__SIZE +
902 GEN6_3DSTATE_VERTEX_BUFFERS__SIZE +
903 GEN6_3DSTATE_VERTEX_ELEMENTS__SIZE +
904 GEN6_3DSTATE_INDEX_BUFFER__SIZE +
905 GEN75_3DSTATE_VF__SIZE +
906 GEN6_3DSTATE_VS__SIZE +
907 GEN6_3DSTATE_GS__SIZE +
908 GEN6_3DSTATE_CLIP__SIZE +
909 GEN6_3DSTATE_SF__SIZE +
910 GEN6_3DSTATE_WM__SIZE +
911 GEN6_3DSTATE_SAMPLE_MASK__SIZE +
912 GEN7_3DSTATE_HS__SIZE +
913 GEN7_3DSTATE_TE__SIZE +
914 GEN7_3DSTATE_DS__SIZE +
915 GEN7_3DSTATE_STREAMOUT__SIZE +
916 GEN7_3DSTATE_SBE__SIZE +
917 GEN7_3DSTATE_PS__SIZE +
918 GEN6_3DSTATE_DRAWING_RECTANGLE__SIZE +
919 GEN6_3DSTATE_POLY_STIPPLE_OFFSET__SIZE +
920 GEN6_3DSTATE_POLY_STIPPLE_PATTERN__SIZE +
921 GEN6_3DSTATE_LINE_STIPPLE__SIZE +
922 GEN6_3DSTATE_AA_LINE_PARAMETERS__SIZE +
923 GEN6_3DSTATE_MULTISAMPLE__SIZE +
924 GEN7_3DSTATE_SO_DECL_LIST__SIZE +
925 GEN6_3DPRIMITIVE__SIZE;
926 }
927
928 return len;
929 }