i965: Handle mix-and-match TCS/TES with separate shader objects.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_l3_state,
190 &gen7_push_constant_space,
191 &gen7_urb,
192 &gen6_blend_state, /* must do before cc unit */
193 &gen6_color_calc_state, /* must do before cc unit */
194 &gen6_depth_stencil_state, /* must do before cc unit */
195
196 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
197
198 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
199 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
200 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
201
202 &gen6_vs_push_constants, /* Before vs_state */
203 &gen6_gs_push_constants, /* Before gs_state */
204 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
205
206 /* Surface state setup. Must come before the VS/WM unit. The binding
207 * table upload must be last.
208 */
209 &brw_vs_pull_constants,
210 &brw_vs_ubo_surfaces,
211 &brw_vs_abo_surfaces,
212 &brw_gs_pull_constants,
213 &brw_gs_ubo_surfaces,
214 &brw_gs_abo_surfaces,
215 &brw_wm_pull_constants,
216 &brw_wm_ubo_surfaces,
217 &brw_wm_abo_surfaces,
218 &gen6_renderbuffer_surfaces,
219 &brw_texture_surfaces,
220 &brw_vs_binding_table,
221 &brw_gs_binding_table,
222 &brw_wm_binding_table,
223
224 &brw_fs_samplers,
225 &brw_vs_samplers,
226 &brw_gs_samplers,
227 &gen6_multisample_state,
228
229 &gen7_vs_state,
230 &gen7_hs_state,
231 &gen7_te_state,
232 &gen7_ds_state,
233 &gen7_gs_state,
234 &gen7_sol_state,
235 &gen7_clip_state,
236 &gen7_sbe_state,
237 &gen7_sf_state,
238 &gen7_wm_state,
239 &gen7_ps_state,
240
241 &gen6_scissor_state,
242
243 &gen7_depthbuffer,
244
245 &brw_polygon_stipple,
246 &brw_polygon_stipple_offset,
247
248 &brw_line_stipple,
249 &brw_aa_line_parameters,
250
251 &brw_drawing_rect,
252
253 &brw_indices, /* must come before brw_vertices */
254 &brw_index_buffer,
255 &brw_vertices,
256
257 &haswell_cut_index,
258 };
259
260 static const struct brw_tracked_state *gen7_compute_atoms[] =
261 {
262 &brw_state_base_address,
263 &gen7_l3_state,
264 &brw_cs_image_surfaces,
265 &gen7_cs_push_constants,
266 &brw_cs_pull_constants,
267 &brw_cs_ubo_surfaces,
268 &brw_cs_abo_surfaces,
269 &brw_texture_surfaces,
270 &brw_cs_work_groups_surface,
271 &brw_cs_state,
272 };
273
274 static const struct brw_tracked_state *gen8_render_atoms[] =
275 {
276 /* Command packets: */
277 &gen8_state_base_address,
278
279 &brw_cc_vp,
280 &gen8_sf_clip_viewport,
281
282 &gen7_l3_state,
283 &gen7_push_constant_space,
284 &gen7_urb,
285 &gen8_blend_state,
286 &gen6_color_calc_state,
287
288 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
289
290 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
291 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
292 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
293 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
294 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
295
296 &gen6_vs_push_constants, /* Before vs_state */
297 &gen7_tcs_push_constants,
298 &gen7_tes_push_constants,
299 &gen6_gs_push_constants, /* Before gs_state */
300 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
301
302 /* Surface state setup. Must come before the VS/WM unit. The binding
303 * table upload must be last.
304 */
305 &brw_vs_pull_constants,
306 &brw_vs_ubo_surfaces,
307 &brw_vs_abo_surfaces,
308 &brw_tcs_pull_constants,
309 &brw_tcs_ubo_surfaces,
310 &brw_tcs_abo_surfaces,
311 &brw_tes_pull_constants,
312 &brw_tes_ubo_surfaces,
313 &brw_tes_abo_surfaces,
314 &brw_gs_pull_constants,
315 &brw_gs_ubo_surfaces,
316 &brw_gs_abo_surfaces,
317 &brw_wm_pull_constants,
318 &brw_wm_ubo_surfaces,
319 &brw_wm_abo_surfaces,
320 &gen6_renderbuffer_surfaces,
321 &brw_texture_surfaces,
322 &brw_vs_binding_table,
323 &brw_tcs_binding_table,
324 &brw_tes_binding_table,
325 &brw_gs_binding_table,
326 &brw_wm_binding_table,
327
328 &brw_fs_samplers,
329 &brw_vs_samplers,
330 &brw_tcs_samplers,
331 &brw_tes_samplers,
332 &brw_gs_samplers,
333 &gen8_multisample_state,
334
335 &gen8_disable_stages,
336 &gen8_vs_state,
337 &gen8_hs_state,
338 &gen7_te_state,
339 &gen8_ds_state,
340 &gen8_gs_state,
341 &gen8_sol_state,
342 &gen6_clip_state,
343 &gen8_raster_state,
344 &gen8_sbe_state,
345 &gen8_sf_state,
346 &gen8_ps_blend,
347 &gen8_ps_extra,
348 &gen8_ps_state,
349 &gen8_wm_depth_stencil,
350 &gen8_wm_state,
351
352 &gen6_scissor_state,
353
354 &gen7_depthbuffer,
355
356 &brw_polygon_stipple,
357 &brw_polygon_stipple_offset,
358
359 &brw_line_stipple,
360 &brw_aa_line_parameters,
361
362 &brw_drawing_rect,
363
364 &gen8_vf_topology,
365
366 &brw_indices,
367 &gen8_index_buffer,
368 &gen8_vertices,
369
370 &haswell_cut_index,
371 &gen8_pma_fix,
372 };
373
374 static const struct brw_tracked_state *gen8_compute_atoms[] =
375 {
376 &gen8_state_base_address,
377 &gen7_l3_state,
378 &brw_cs_image_surfaces,
379 &gen7_cs_push_constants,
380 &brw_cs_pull_constants,
381 &brw_cs_ubo_surfaces,
382 &brw_cs_abo_surfaces,
383 &brw_texture_surfaces,
384 &brw_cs_work_groups_surface,
385 &brw_cs_state,
386 };
387
388 static void
389 brw_upload_initial_gpu_state(struct brw_context *brw)
390 {
391 /* On platforms with hardware contexts, we can set our initial GPU state
392 * right away rather than doing it via state atoms. This saves a small
393 * amount of overhead on every draw call.
394 */
395 if (!brw->hw_ctx)
396 return;
397
398 if (brw->gen == 6)
399 brw_emit_post_sync_nonzero_flush(brw);
400
401 brw_upload_invariant_state(brw);
402
403 /* Recommended optimization for Victim Cache eviction in pixel backend. */
404 if (brw->gen >= 9) {
405 BEGIN_BATCH(3);
406 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
407 OUT_BATCH(GEN7_CACHE_MODE_1);
408 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
409 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
410 ADVANCE_BATCH();
411 }
412
413 if (brw->gen >= 8) {
414 gen8_emit_3dstate_sample_pattern(brw);
415 }
416 }
417
418 static inline const struct brw_tracked_state *
419 brw_get_pipeline_atoms(struct brw_context *brw,
420 enum brw_pipeline pipeline)
421 {
422 switch (pipeline) {
423 case BRW_RENDER_PIPELINE:
424 return brw->render_atoms;
425 case BRW_COMPUTE_PIPELINE:
426 return brw->compute_atoms;
427 default:
428 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
429 unreachable("Unsupported pipeline");
430 return NULL;
431 }
432 }
433
434 static void
435 brw_copy_pipeline_atoms(struct brw_context *brw,
436 enum brw_pipeline pipeline,
437 const struct brw_tracked_state **atoms,
438 int num_atoms)
439 {
440 /* This is to work around brw_context::atoms being declared const. We want
441 * it to be const, but it needs to be initialized somehow!
442 */
443 struct brw_tracked_state *context_atoms =
444 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
445
446 for (int i = 0; i < num_atoms; i++) {
447 context_atoms[i] = *atoms[i];
448 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
449 assert(context_atoms[i].emit);
450 }
451
452 brw->num_atoms[pipeline] = num_atoms;
453 }
454
455 void brw_init_state( struct brw_context *brw )
456 {
457 struct gl_context *ctx = &brw->ctx;
458
459 /* Force the first brw_select_pipeline to emit pipeline select */
460 brw->last_pipeline = BRW_NUM_PIPELINES;
461
462 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
463 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
464 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
465 ARRAY_SIZE(brw->render_atoms));
466 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
467 ARRAY_SIZE(brw->render_atoms));
468 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
469 ARRAY_SIZE(brw->compute_atoms));
470 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
471 ARRAY_SIZE(brw->compute_atoms));
472
473 brw_init_caches(brw);
474
475 if (brw->gen >= 8) {
476 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
477 gen8_render_atoms,
478 ARRAY_SIZE(gen8_render_atoms));
479 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
480 gen8_compute_atoms,
481 ARRAY_SIZE(gen8_compute_atoms));
482 } else if (brw->gen == 7) {
483 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
484 gen7_render_atoms,
485 ARRAY_SIZE(gen7_render_atoms));
486 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
487 gen7_compute_atoms,
488 ARRAY_SIZE(gen7_compute_atoms));
489 } else if (brw->gen == 6) {
490 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
491 gen6_atoms, ARRAY_SIZE(gen6_atoms));
492 } else {
493 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
494 gen4_atoms, ARRAY_SIZE(gen4_atoms));
495 }
496
497 brw_upload_initial_gpu_state(brw);
498
499 brw->NewGLState = ~0;
500 brw->ctx.NewDriverState = ~0ull;
501
502 /* ~0 is a nonsensical value which won't match anything we program, so
503 * the programming will take effect on the first time around.
504 */
505 brw->pma_stall_bits = ~0;
506
507 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
508 * dirty flags.
509 */
510 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
511
512 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
513 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
514 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
515 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
516 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
517 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
518 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
519 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
520 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
521 }
522
523
524 void brw_destroy_state( struct brw_context *brw )
525 {
526 brw_destroy_caches(brw);
527 }
528
529 /***********************************************************************
530 */
531
532 static bool
533 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
534 {
535 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
536 }
537
538 static void accumulate_state( struct brw_state_flags *a,
539 const struct brw_state_flags *b )
540 {
541 a->mesa |= b->mesa;
542 a->brw |= b->brw;
543 }
544
545
546 static void xor_states( struct brw_state_flags *result,
547 const struct brw_state_flags *a,
548 const struct brw_state_flags *b )
549 {
550 result->mesa = a->mesa ^ b->mesa;
551 result->brw = a->brw ^ b->brw;
552 }
553
554 struct dirty_bit_map {
555 uint64_t bit;
556 char *name;
557 uint32_t count;
558 };
559
560 #define DEFINE_BIT(name) {name, #name, 0}
561
562 static struct dirty_bit_map mesa_bits[] = {
563 DEFINE_BIT(_NEW_MODELVIEW),
564 DEFINE_BIT(_NEW_PROJECTION),
565 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
566 DEFINE_BIT(_NEW_COLOR),
567 DEFINE_BIT(_NEW_DEPTH),
568 DEFINE_BIT(_NEW_EVAL),
569 DEFINE_BIT(_NEW_FOG),
570 DEFINE_BIT(_NEW_HINT),
571 DEFINE_BIT(_NEW_LIGHT),
572 DEFINE_BIT(_NEW_LINE),
573 DEFINE_BIT(_NEW_PIXEL),
574 DEFINE_BIT(_NEW_POINT),
575 DEFINE_BIT(_NEW_POLYGON),
576 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
577 DEFINE_BIT(_NEW_SCISSOR),
578 DEFINE_BIT(_NEW_STENCIL),
579 DEFINE_BIT(_NEW_TEXTURE),
580 DEFINE_BIT(_NEW_TRANSFORM),
581 DEFINE_BIT(_NEW_VIEWPORT),
582 DEFINE_BIT(_NEW_ARRAY),
583 DEFINE_BIT(_NEW_RENDERMODE),
584 DEFINE_BIT(_NEW_BUFFERS),
585 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
586 DEFINE_BIT(_NEW_MULTISAMPLE),
587 DEFINE_BIT(_NEW_TRACK_MATRIX),
588 DEFINE_BIT(_NEW_PROGRAM),
589 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
590 DEFINE_BIT(_NEW_BUFFER_OBJECT),
591 DEFINE_BIT(_NEW_FRAG_CLAMP),
592 /* Avoid sign extension problems. */
593 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
594 {0, 0, 0}
595 };
596
597 static struct dirty_bit_map brw_bits[] = {
598 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
599 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
600 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
601 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
602 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
603 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
604 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
605 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
606 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
607 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
608 DEFINE_BIT(BRW_NEW_URB_FENCE),
609 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
610 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
611 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
612 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
613 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
614 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
615 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
616 DEFINE_BIT(BRW_NEW_PRIMITIVE),
617 DEFINE_BIT(BRW_NEW_CONTEXT),
618 DEFINE_BIT(BRW_NEW_PSP),
619 DEFINE_BIT(BRW_NEW_SURFACES),
620 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
621 DEFINE_BIT(BRW_NEW_INDICES),
622 DEFINE_BIT(BRW_NEW_VERTICES),
623 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
624 DEFINE_BIT(BRW_NEW_BATCH),
625 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
626 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
627 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
628 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
629 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
630 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
631 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
632 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
633 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
634 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
635 DEFINE_BIT(BRW_NEW_STATS_WM),
636 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
637 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
638 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
639 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
640 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
641 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
642 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
643 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
644 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
645 DEFINE_BIT(BRW_NEW_CC_VP),
646 DEFINE_BIT(BRW_NEW_SF_VP),
647 DEFINE_BIT(BRW_NEW_CLIP_VP),
648 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
649 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
650 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
651 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
652 DEFINE_BIT(BRW_NEW_URB_SIZE),
653 {0, 0, 0}
654 };
655
656 static void
657 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
658 {
659 for (int i = 0; bit_map[i].bit != 0; i++) {
660 if (bit_map[i].bit & bits)
661 bit_map[i].count++;
662 }
663 }
664
665 static void
666 brw_print_dirty_count(struct dirty_bit_map *bit_map)
667 {
668 for (int i = 0; bit_map[i].bit != 0; i++) {
669 if (bit_map[i].count > 1) {
670 fprintf(stderr, "0x%016lx: %12d (%s)\n",
671 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
672 }
673 }
674 }
675
676 static inline void
677 brw_upload_tess_programs(struct brw_context *brw)
678 {
679 if (brw->tess_eval_program) {
680 uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
681 uint32_t per_patch_slots =
682 brw->tess_eval_program->Base.PatchInputsRead;
683
684 /* The TCS may have additional outputs which aren't read by the
685 * TES (possibly for cross-thread communication). These need to
686 * be stored in the Patch URB Entry as well.
687 */
688 if (brw->tess_ctrl_program) {
689 per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
690 per_patch_slots |=
691 brw->tess_ctrl_program->Base.PatchOutputsWritten;
692 }
693
694 brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
695 brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
696 } else {
697 brw->tcs.prog_data = NULL;
698 brw->tcs.base.prog_data = NULL;
699 brw->tes.prog_data = NULL;
700 brw->tes.base.prog_data = NULL;
701 }
702 }
703
704 static inline void
705 brw_upload_programs(struct brw_context *brw,
706 enum brw_pipeline pipeline)
707 {
708 if (pipeline == BRW_RENDER_PIPELINE) {
709 brw_upload_vs_prog(brw);
710 brw_upload_tess_programs(brw);
711
712 if (brw->gen < 6)
713 brw_upload_ff_gs_prog(brw);
714 else
715 brw_upload_gs_prog(brw);
716
717 /* Update the VUE map for data exiting the GS stage of the pipeline.
718 * This comes from the last enabled shader stage.
719 */
720 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
721 bool old_separate = brw->vue_map_geom_out.separate;
722 if (brw->geometry_program)
723 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
724 else if (brw->tess_eval_program)
725 brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
726 else
727 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
728
729 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
730 if (old_slots != brw->vue_map_geom_out.slots_valid ||
731 old_separate != brw->vue_map_geom_out.separate)
732 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
733
734 brw_upload_wm_prog(brw);
735 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
736 brw_upload_cs_prog(brw);
737 }
738 }
739
740 static inline void
741 merge_ctx_state(struct brw_context *brw,
742 struct brw_state_flags *state)
743 {
744 state->mesa |= brw->NewGLState;
745 state->brw |= brw->ctx.NewDriverState;
746 }
747
748 static inline void
749 check_and_emit_atom(struct brw_context *brw,
750 struct brw_state_flags *state,
751 const struct brw_tracked_state *atom)
752 {
753 if (check_state(state, &atom->dirty)) {
754 atom->emit(brw);
755 merge_ctx_state(brw, state);
756 }
757 }
758
759 static inline void
760 brw_upload_pipeline_state(struct brw_context *brw,
761 enum brw_pipeline pipeline)
762 {
763 struct gl_context *ctx = &brw->ctx;
764 int i;
765 static int dirty_count = 0;
766 struct brw_state_flags state = brw->state.pipelines[pipeline];
767 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
768
769 brw_select_pipeline(brw, pipeline);
770
771 if (0) {
772 /* Always re-emit all state. */
773 brw->NewGLState = ~0;
774 ctx->NewDriverState = ~0ull;
775 }
776
777 if (pipeline == BRW_RENDER_PIPELINE) {
778 if (brw->fragment_program != ctx->FragmentProgram._Current) {
779 brw->fragment_program = ctx->FragmentProgram._Current;
780 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
781 }
782
783 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
784 brw->tess_eval_program = ctx->TessEvalProgram._Current;
785 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
786 }
787
788 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
789 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
790 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
791 }
792
793 if (brw->geometry_program != ctx->GeometryProgram._Current) {
794 brw->geometry_program = ctx->GeometryProgram._Current;
795 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
796 }
797
798 if (brw->vertex_program != ctx->VertexProgram._Current) {
799 brw->vertex_program = ctx->VertexProgram._Current;
800 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
801 }
802 }
803
804 if (brw->compute_program != ctx->ComputeProgram._Current) {
805 brw->compute_program = ctx->ComputeProgram._Current;
806 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
807 }
808
809 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
810 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
811 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
812 }
813
814 if (brw->num_samples != fb_samples) {
815 brw->num_samples = fb_samples;
816 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
817 }
818
819 /* Exit early if no state is flagged as dirty */
820 merge_ctx_state(brw, &state);
821 if ((state.mesa | state.brw) == 0)
822 return;
823
824 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
825 if (brw->gen == 6)
826 brw_emit_post_sync_nonzero_flush(brw);
827
828 brw_upload_programs(brw, pipeline);
829 merge_ctx_state(brw, &state);
830
831 const struct brw_tracked_state *atoms =
832 brw_get_pipeline_atoms(brw, pipeline);
833 const int num_atoms = brw->num_atoms[pipeline];
834
835 if (unlikely(INTEL_DEBUG)) {
836 /* Debug version which enforces various sanity checks on the
837 * state flags which are generated and checked to help ensure
838 * state atoms are ordered correctly in the list.
839 */
840 struct brw_state_flags examined, prev;
841 memset(&examined, 0, sizeof(examined));
842 prev = state;
843
844 for (i = 0; i < num_atoms; i++) {
845 const struct brw_tracked_state *atom = &atoms[i];
846 struct brw_state_flags generated;
847
848 check_and_emit_atom(brw, &state, atom);
849
850 accumulate_state(&examined, &atom->dirty);
851
852 /* generated = (prev ^ state)
853 * if (examined & generated)
854 * fail;
855 */
856 xor_states(&generated, &prev, &state);
857 assert(!check_state(&examined, &generated));
858 prev = state;
859 }
860 }
861 else {
862 for (i = 0; i < num_atoms; i++) {
863 const struct brw_tracked_state *atom = &atoms[i];
864
865 check_and_emit_atom(brw, &state, atom);
866 }
867 }
868
869 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
870 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
871
872 brw_update_dirty_count(mesa_bits, state.mesa);
873 brw_update_dirty_count(brw_bits, state.brw);
874 if (dirty_count++ % 1000 == 0) {
875 brw_print_dirty_count(mesa_bits);
876 brw_print_dirty_count(brw_bits);
877 fprintf(stderr, "\n");
878 }
879 }
880 }
881
882 /***********************************************************************
883 * Emit all state:
884 */
885 void brw_upload_render_state(struct brw_context *brw)
886 {
887 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
888 }
889
890 static inline void
891 brw_pipeline_state_finished(struct brw_context *brw,
892 enum brw_pipeline pipeline)
893 {
894 /* Save all dirty state into the other pipelines */
895 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
896 if (i != pipeline) {
897 brw->state.pipelines[i].mesa |= brw->NewGLState;
898 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
899 } else {
900 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
901 }
902 }
903
904 brw->NewGLState = 0;
905 brw->ctx.NewDriverState = 0ull;
906 }
907
908 /**
909 * Clear dirty bits to account for the fact that the state emitted by
910 * brw_upload_render_state() has been committed to the hardware. This is a
911 * separate call from brw_upload_render_state() because it's possible that
912 * after the call to brw_upload_render_state(), we will discover that we've
913 * run out of aperture space, and need to rewind the batch buffer to the state
914 * it had before the brw_upload_render_state() call.
915 */
916 void
917 brw_render_state_finished(struct brw_context *brw)
918 {
919 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
920 }
921
922 void
923 brw_upload_compute_state(struct brw_context *brw)
924 {
925 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
926 }
927
928 void
929 brw_compute_state_finished(struct brw_context *brw)
930 {
931 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
932 }