i965: Define state flag to signal that the URB size has been altered.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_vs_state,
229 &gen7_hs_state,
230 &gen7_te_state,
231 &gen7_ds_state,
232 &gen7_gs_state,
233 &gen7_sol_state,
234 &gen7_clip_state,
235 &gen7_sbe_state,
236 &gen7_sf_state,
237 &gen7_wm_state,
238 &gen7_ps_state,
239
240 &gen6_scissor_state,
241
242 &gen7_depthbuffer,
243
244 &brw_polygon_stipple,
245 &brw_polygon_stipple_offset,
246
247 &brw_line_stipple,
248 &brw_aa_line_parameters,
249
250 &brw_drawing_rect,
251
252 &brw_indices, /* must come before brw_vertices */
253 &brw_index_buffer,
254 &brw_vertices,
255
256 &haswell_cut_index,
257 };
258
259 static const struct brw_tracked_state *gen7_compute_atoms[] =
260 {
261 &brw_state_base_address,
262 &brw_cs_image_surfaces,
263 &gen7_cs_push_constants,
264 &brw_cs_pull_constants,
265 &brw_cs_ubo_surfaces,
266 &brw_cs_abo_surfaces,
267 &brw_texture_surfaces,
268 &brw_cs_work_groups_surface,
269 &brw_cs_state,
270 };
271
272 static const struct brw_tracked_state *gen8_render_atoms[] =
273 {
274 /* Command packets: */
275 &gen8_state_base_address,
276
277 &brw_cc_vp,
278 &gen8_sf_clip_viewport,
279
280 &gen7_push_constant_space,
281 &gen7_urb,
282 &gen8_blend_state,
283 &gen6_color_calc_state,
284
285 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
286
287 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
288 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
289 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
290
291 &gen6_vs_push_constants, /* Before vs_state */
292 &gen6_gs_push_constants, /* Before gs_state */
293 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
294
295 /* Surface state setup. Must come before the VS/WM unit. The binding
296 * table upload must be last.
297 */
298 &brw_vs_pull_constants,
299 &brw_vs_ubo_surfaces,
300 &brw_vs_abo_surfaces,
301 &brw_gs_pull_constants,
302 &brw_gs_ubo_surfaces,
303 &brw_gs_abo_surfaces,
304 &brw_wm_pull_constants,
305 &brw_wm_ubo_surfaces,
306 &brw_wm_abo_surfaces,
307 &gen6_renderbuffer_surfaces,
308 &brw_texture_surfaces,
309 &brw_vs_binding_table,
310 &brw_gs_binding_table,
311 &brw_wm_binding_table,
312
313 &brw_fs_samplers,
314 &brw_vs_samplers,
315 &brw_gs_samplers,
316 &gen8_multisample_state,
317
318 &gen8_disable_stages,
319 &gen8_vs_state,
320 &gen8_hs_state,
321 &gen7_te_state,
322 &gen8_ds_state,
323 &gen8_gs_state,
324 &gen8_sol_state,
325 &gen6_clip_state,
326 &gen8_raster_state,
327 &gen8_sbe_state,
328 &gen8_sf_state,
329 &gen8_ps_blend,
330 &gen8_ps_extra,
331 &gen8_ps_state,
332 &gen8_wm_depth_stencil,
333 &gen8_wm_state,
334
335 &gen6_scissor_state,
336
337 &gen7_depthbuffer,
338
339 &brw_polygon_stipple,
340 &brw_polygon_stipple_offset,
341
342 &brw_line_stipple,
343 &brw_aa_line_parameters,
344
345 &brw_drawing_rect,
346
347 &gen8_vf_topology,
348
349 &brw_indices,
350 &gen8_index_buffer,
351 &gen8_vertices,
352
353 &haswell_cut_index,
354 &gen8_pma_fix,
355 };
356
357 static const struct brw_tracked_state *gen8_compute_atoms[] =
358 {
359 &gen8_state_base_address,
360 &brw_cs_image_surfaces,
361 &gen7_cs_push_constants,
362 &brw_cs_pull_constants,
363 &brw_cs_ubo_surfaces,
364 &brw_cs_abo_surfaces,
365 &brw_texture_surfaces,
366 &brw_cs_work_groups_surface,
367 &brw_cs_state,
368 };
369
370 static void
371 brw_upload_initial_gpu_state(struct brw_context *brw)
372 {
373 /* On platforms with hardware contexts, we can set our initial GPU state
374 * right away rather than doing it via state atoms. This saves a small
375 * amount of overhead on every draw call.
376 */
377 if (!brw->hw_ctx)
378 return;
379
380 if (brw->gen == 6)
381 brw_emit_post_sync_nonzero_flush(brw);
382
383 brw_upload_invariant_state(brw);
384
385 /* Recommended optimization for Victim Cache eviction in pixel backend. */
386 if (brw->gen >= 9) {
387 BEGIN_BATCH(3);
388 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
389 OUT_BATCH(GEN7_CACHE_MODE_1);
390 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
391 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
392 ADVANCE_BATCH();
393 }
394
395 if (brw->gen >= 8) {
396 gen8_emit_3dstate_sample_pattern(brw);
397 }
398 }
399
400 static inline const struct brw_tracked_state *
401 brw_get_pipeline_atoms(struct brw_context *brw,
402 enum brw_pipeline pipeline)
403 {
404 switch (pipeline) {
405 case BRW_RENDER_PIPELINE:
406 return brw->render_atoms;
407 case BRW_COMPUTE_PIPELINE:
408 return brw->compute_atoms;
409 default:
410 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
411 unreachable("Unsupported pipeline");
412 return NULL;
413 }
414 }
415
416 static void
417 brw_copy_pipeline_atoms(struct brw_context *brw,
418 enum brw_pipeline pipeline,
419 const struct brw_tracked_state **atoms,
420 int num_atoms)
421 {
422 /* This is to work around brw_context::atoms being declared const. We want
423 * it to be const, but it needs to be initialized somehow!
424 */
425 struct brw_tracked_state *context_atoms =
426 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
427
428 for (int i = 0; i < num_atoms; i++) {
429 context_atoms[i] = *atoms[i];
430 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
431 assert(context_atoms[i].emit);
432 }
433
434 brw->num_atoms[pipeline] = num_atoms;
435 }
436
437 void brw_init_state( struct brw_context *brw )
438 {
439 struct gl_context *ctx = &brw->ctx;
440
441 /* Force the first brw_select_pipeline to emit pipeline select */
442 brw->last_pipeline = BRW_NUM_PIPELINES;
443
444 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
445 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
446 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
447 ARRAY_SIZE(brw->render_atoms));
448 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
449 ARRAY_SIZE(brw->render_atoms));
450 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
451 ARRAY_SIZE(brw->compute_atoms));
452 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
453 ARRAY_SIZE(brw->compute_atoms));
454
455 brw_init_caches(brw);
456
457 if (brw->gen >= 8) {
458 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
459 gen8_render_atoms,
460 ARRAY_SIZE(gen8_render_atoms));
461 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
462 gen8_compute_atoms,
463 ARRAY_SIZE(gen8_compute_atoms));
464 } else if (brw->gen == 7) {
465 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
466 gen7_render_atoms,
467 ARRAY_SIZE(gen7_render_atoms));
468 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
469 gen7_compute_atoms,
470 ARRAY_SIZE(gen7_compute_atoms));
471 } else if (brw->gen == 6) {
472 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
473 gen6_atoms, ARRAY_SIZE(gen6_atoms));
474 } else {
475 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
476 gen4_atoms, ARRAY_SIZE(gen4_atoms));
477 }
478
479 brw_upload_initial_gpu_state(brw);
480
481 brw->NewGLState = ~0;
482 brw->ctx.NewDriverState = ~0ull;
483
484 /* ~0 is a nonsensical value which won't match anything we program, so
485 * the programming will take effect on the first time around.
486 */
487 brw->pma_stall_bits = ~0;
488
489 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
490 * dirty flags.
491 */
492 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
493
494 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
495 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
496 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
497 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
498 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
499 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
500 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
501 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
502 }
503
504
505 void brw_destroy_state( struct brw_context *brw )
506 {
507 brw_destroy_caches(brw);
508 }
509
510 /***********************************************************************
511 */
512
513 static bool
514 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
515 {
516 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
517 }
518
519 static void accumulate_state( struct brw_state_flags *a,
520 const struct brw_state_flags *b )
521 {
522 a->mesa |= b->mesa;
523 a->brw |= b->brw;
524 }
525
526
527 static void xor_states( struct brw_state_flags *result,
528 const struct brw_state_flags *a,
529 const struct brw_state_flags *b )
530 {
531 result->mesa = a->mesa ^ b->mesa;
532 result->brw = a->brw ^ b->brw;
533 }
534
535 struct dirty_bit_map {
536 uint64_t bit;
537 char *name;
538 uint32_t count;
539 };
540
541 #define DEFINE_BIT(name) {name, #name, 0}
542
543 static struct dirty_bit_map mesa_bits[] = {
544 DEFINE_BIT(_NEW_MODELVIEW),
545 DEFINE_BIT(_NEW_PROJECTION),
546 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
547 DEFINE_BIT(_NEW_COLOR),
548 DEFINE_BIT(_NEW_DEPTH),
549 DEFINE_BIT(_NEW_EVAL),
550 DEFINE_BIT(_NEW_FOG),
551 DEFINE_BIT(_NEW_HINT),
552 DEFINE_BIT(_NEW_LIGHT),
553 DEFINE_BIT(_NEW_LINE),
554 DEFINE_BIT(_NEW_PIXEL),
555 DEFINE_BIT(_NEW_POINT),
556 DEFINE_BIT(_NEW_POLYGON),
557 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
558 DEFINE_BIT(_NEW_SCISSOR),
559 DEFINE_BIT(_NEW_STENCIL),
560 DEFINE_BIT(_NEW_TEXTURE),
561 DEFINE_BIT(_NEW_TRANSFORM),
562 DEFINE_BIT(_NEW_VIEWPORT),
563 DEFINE_BIT(_NEW_ARRAY),
564 DEFINE_BIT(_NEW_RENDERMODE),
565 DEFINE_BIT(_NEW_BUFFERS),
566 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
567 DEFINE_BIT(_NEW_MULTISAMPLE),
568 DEFINE_BIT(_NEW_TRACK_MATRIX),
569 DEFINE_BIT(_NEW_PROGRAM),
570 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
571 DEFINE_BIT(_NEW_BUFFER_OBJECT),
572 DEFINE_BIT(_NEW_FRAG_CLAMP),
573 /* Avoid sign extension problems. */
574 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
575 {0, 0, 0}
576 };
577
578 static struct dirty_bit_map brw_bits[] = {
579 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
580 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
581 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
582 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
583 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
584 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
585 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
586 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
587 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
588 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
589 DEFINE_BIT(BRW_NEW_URB_FENCE),
590 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
591 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
592 DEFINE_BIT(BRW_NEW_TESS_EVAL_PROGRAM),
593 DEFINE_BIT(BRW_NEW_TESS_CTRL_PROGRAM),
594 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
595 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
596 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
597 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
598 DEFINE_BIT(BRW_NEW_PRIMITIVE),
599 DEFINE_BIT(BRW_NEW_CONTEXT),
600 DEFINE_BIT(BRW_NEW_PSP),
601 DEFINE_BIT(BRW_NEW_SURFACES),
602 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
603 DEFINE_BIT(BRW_NEW_INDICES),
604 DEFINE_BIT(BRW_NEW_VERTICES),
605 DEFINE_BIT(BRW_NEW_BATCH),
606 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
607 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
608 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
609 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
610 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
611 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
612 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
613 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
614 DEFINE_BIT(BRW_NEW_STATS_WM),
615 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
616 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
617 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
618 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
619 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
620 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
621 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
622 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
623 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
624 DEFINE_BIT(BRW_NEW_CC_VP),
625 DEFINE_BIT(BRW_NEW_SF_VP),
626 DEFINE_BIT(BRW_NEW_CLIP_VP),
627 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
628 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
629 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
630 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
631 DEFINE_BIT(BRW_NEW_URB_SIZE),
632 {0, 0, 0}
633 };
634
635 static void
636 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
637 {
638 for (int i = 0; bit_map[i].bit != 0; i++) {
639 if (bit_map[i].bit & bits)
640 bit_map[i].count++;
641 }
642 }
643
644 static void
645 brw_print_dirty_count(struct dirty_bit_map *bit_map)
646 {
647 for (int i = 0; bit_map[i].bit != 0; i++) {
648 if (bit_map[i].count > 1) {
649 fprintf(stderr, "0x%016lx: %12d (%s)\n",
650 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
651 }
652 }
653 }
654
655 static inline void
656 brw_upload_programs(struct brw_context *brw,
657 enum brw_pipeline pipeline)
658 {
659 if (pipeline == BRW_RENDER_PIPELINE) {
660 brw_upload_vs_prog(brw);
661
662 if (brw->gen < 6)
663 brw_upload_ff_gs_prog(brw);
664 else
665 brw_upload_gs_prog(brw);
666
667 /* Update the VUE map for data exiting the GS stage of the pipeline.
668 * This comes from the last enabled shader stage.
669 */
670 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
671 bool old_separate = brw->vue_map_geom_out.separate;
672 if (brw->geometry_program)
673 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
674 else
675 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
676
677 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
678 if (old_slots != brw->vue_map_geom_out.slots_valid ||
679 old_separate != brw->vue_map_geom_out.separate)
680 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
681
682 brw_upload_wm_prog(brw);
683 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
684 brw_upload_cs_prog(brw);
685 }
686 }
687
688 static inline void
689 merge_ctx_state(struct brw_context *brw,
690 struct brw_state_flags *state)
691 {
692 state->mesa |= brw->NewGLState;
693 state->brw |= brw->ctx.NewDriverState;
694 }
695
696 static inline void
697 check_and_emit_atom(struct brw_context *brw,
698 struct brw_state_flags *state,
699 const struct brw_tracked_state *atom)
700 {
701 if (check_state(state, &atom->dirty)) {
702 atom->emit(brw);
703 merge_ctx_state(brw, state);
704 }
705 }
706
707 static inline void
708 brw_upload_pipeline_state(struct brw_context *brw,
709 enum brw_pipeline pipeline)
710 {
711 struct gl_context *ctx = &brw->ctx;
712 int i;
713 static int dirty_count = 0;
714 struct brw_state_flags state = brw->state.pipelines[pipeline];
715 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
716
717 brw_select_pipeline(brw, pipeline);
718
719 if (0) {
720 /* Always re-emit all state. */
721 brw->NewGLState = ~0;
722 ctx->NewDriverState = ~0ull;
723 }
724
725 if (pipeline == BRW_RENDER_PIPELINE) {
726 if (brw->fragment_program != ctx->FragmentProgram._Current) {
727 brw->fragment_program = ctx->FragmentProgram._Current;
728 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
729 }
730
731 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
732 brw->tess_eval_program = ctx->TessEvalProgram._Current;
733 brw->ctx.NewDriverState |= BRW_NEW_TESS_EVAL_PROGRAM;
734 }
735
736 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
737 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
738 brw->ctx.NewDriverState |= BRW_NEW_TESS_CTRL_PROGRAM;
739 }
740
741 if (brw->geometry_program != ctx->GeometryProgram._Current) {
742 brw->geometry_program = ctx->GeometryProgram._Current;
743 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
744 }
745
746 if (brw->vertex_program != ctx->VertexProgram._Current) {
747 brw->vertex_program = ctx->VertexProgram._Current;
748 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
749 }
750 }
751
752 if (brw->compute_program != ctx->ComputeProgram._Current) {
753 brw->compute_program = ctx->ComputeProgram._Current;
754 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
755 }
756
757 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
758 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
759 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
760 }
761
762 if (brw->num_samples != fb_samples) {
763 brw->num_samples = fb_samples;
764 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
765 }
766
767 /* Exit early if no state is flagged as dirty */
768 merge_ctx_state(brw, &state);
769 if ((state.mesa | state.brw) == 0)
770 return;
771
772 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
773 if (brw->gen == 6)
774 brw_emit_post_sync_nonzero_flush(brw);
775
776 brw_upload_programs(brw, pipeline);
777 merge_ctx_state(brw, &state);
778
779 const struct brw_tracked_state *atoms =
780 brw_get_pipeline_atoms(brw, pipeline);
781 const int num_atoms = brw->num_atoms[pipeline];
782
783 if (unlikely(INTEL_DEBUG)) {
784 /* Debug version which enforces various sanity checks on the
785 * state flags which are generated and checked to help ensure
786 * state atoms are ordered correctly in the list.
787 */
788 struct brw_state_flags examined, prev;
789 memset(&examined, 0, sizeof(examined));
790 prev = state;
791
792 for (i = 0; i < num_atoms; i++) {
793 const struct brw_tracked_state *atom = &atoms[i];
794 struct brw_state_flags generated;
795
796 check_and_emit_atom(brw, &state, atom);
797
798 accumulate_state(&examined, &atom->dirty);
799
800 /* generated = (prev ^ state)
801 * if (examined & generated)
802 * fail;
803 */
804 xor_states(&generated, &prev, &state);
805 assert(!check_state(&examined, &generated));
806 prev = state;
807 }
808 }
809 else {
810 for (i = 0; i < num_atoms; i++) {
811 const struct brw_tracked_state *atom = &atoms[i];
812
813 check_and_emit_atom(brw, &state, atom);
814 }
815 }
816
817 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
818 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
819
820 brw_update_dirty_count(mesa_bits, state.mesa);
821 brw_update_dirty_count(brw_bits, state.brw);
822 if (dirty_count++ % 1000 == 0) {
823 brw_print_dirty_count(mesa_bits);
824 brw_print_dirty_count(brw_bits);
825 fprintf(stderr, "\n");
826 }
827 }
828 }
829
830 /***********************************************************************
831 * Emit all state:
832 */
833 void brw_upload_render_state(struct brw_context *brw)
834 {
835 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
836 }
837
838 static inline void
839 brw_pipeline_state_finished(struct brw_context *brw,
840 enum brw_pipeline pipeline)
841 {
842 /* Save all dirty state into the other pipelines */
843 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
844 if (i != pipeline) {
845 brw->state.pipelines[i].mesa |= brw->NewGLState;
846 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
847 } else {
848 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
849 }
850 }
851
852 brw->NewGLState = 0;
853 brw->ctx.NewDriverState = 0ull;
854 }
855
856 /**
857 * Clear dirty bits to account for the fact that the state emitted by
858 * brw_upload_render_state() has been committed to the hardware. This is a
859 * separate call from brw_upload_render_state() because it's possible that
860 * after the call to brw_upload_render_state(), we will discover that we've
861 * run out of aperture space, and need to rewind the batch buffer to the state
862 * it had before the brw_upload_render_state() call.
863 */
864 void
865 brw_render_state_finished(struct brw_context *brw)
866 {
867 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
868 }
869
870 void
871 brw_upload_compute_state(struct brw_context *brw)
872 {
873 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
874 }
875
876 void
877 brw_compute_state_finished(struct brw_context *brw)
878 {
879 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
880 }