i965/state: Don't use brw->state.dirty.mesa
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43
44 static const struct brw_tracked_state *gen4_atoms[] =
45 {
46 &brw_interpolation_map,
47
48 &brw_clip_prog, /* must do before state base address */
49 &brw_sf_prog, /* must do before state base address */
50
51 /* Once all the programs are done, we know how large urb entry
52 * sizes need to be and can decide if we need to change the urb
53 * layout.
54 */
55 &brw_curbe_offsets,
56 &brw_recalculate_urb_fence,
57
58 &brw_cc_vp,
59 &brw_cc_unit,
60
61 /* Surface state setup. Must come before the VS/WM unit. The binding
62 * table upload must be last.
63 */
64 &brw_vs_pull_constants,
65 &brw_wm_pull_constants,
66 &brw_renderbuffer_surfaces,
67 &brw_texture_surfaces,
68 &brw_vs_binding_table,
69 &brw_wm_binding_table,
70
71 &brw_fs_samplers,
72 &brw_vs_samplers,
73
74 /* These set up state for brw_psp_urb_cbs */
75 &brw_wm_unit,
76 &brw_sf_vp,
77 &brw_sf_unit,
78 &brw_vs_unit, /* always required, enabled or not */
79 &brw_clip_unit,
80 &brw_gs_unit,
81
82 /* Command packets:
83 */
84 &brw_invariant_state,
85 &brw_state_base_address,
86
87 &brw_binding_table_pointers,
88 &brw_blend_constant_color,
89
90 &brw_depthbuffer,
91
92 &brw_polygon_stipple,
93 &brw_polygon_stipple_offset,
94
95 &brw_line_stipple,
96 &brw_aa_line_parameters,
97
98 &brw_psp_urb_cbs,
99
100 &brw_drawing_rect,
101 &brw_indices, /* must come before brw_vertices */
102 &brw_index_buffer,
103 &brw_vertices,
104
105 &brw_constant_buffer
106 };
107
108 static const struct brw_tracked_state *gen6_atoms[] =
109 {
110 &gen6_clip_vp,
111 &gen6_sf_vp,
112
113 /* Command packets: */
114
115 /* must do before binding table pointers, cc state ptrs */
116 &brw_state_base_address,
117
118 &brw_cc_vp,
119 &gen6_viewport_state, /* must do after *_vp stages */
120
121 &gen6_urb,
122 &gen6_blend_state, /* must do before cc unit */
123 &gen6_color_calc_state, /* must do before cc unit */
124 &gen6_depth_stencil_state, /* must do before cc unit */
125
126 &gen6_vs_push_constants, /* Before vs_state */
127 &gen6_gs_push_constants, /* Before gs_state */
128 &gen6_wm_push_constants, /* Before wm_state */
129
130 /* Surface state setup. Must come before the VS/WM unit. The binding
131 * table upload must be last.
132 */
133 &brw_vs_pull_constants,
134 &brw_vs_ubo_surfaces,
135 &brw_gs_pull_constants,
136 &brw_gs_ubo_surfaces,
137 &brw_wm_pull_constants,
138 &brw_wm_ubo_surfaces,
139 &gen6_renderbuffer_surfaces,
140 &brw_texture_surfaces,
141 &gen6_sol_surface,
142 &brw_vs_binding_table,
143 &gen6_gs_binding_table,
144 &brw_wm_binding_table,
145
146 &brw_fs_samplers,
147 &brw_vs_samplers,
148 &brw_gs_samplers,
149 &gen6_sampler_state,
150 &gen6_multisample_state,
151
152 &gen6_vs_state,
153 &gen6_gs_state,
154 &gen6_clip_state,
155 &gen6_sf_state,
156 &gen6_wm_state,
157
158 &gen6_scissor_state,
159
160 &gen6_binding_table_pointers,
161
162 &brw_depthbuffer,
163
164 &brw_polygon_stipple,
165 &brw_polygon_stipple_offset,
166
167 &brw_line_stipple,
168 &brw_aa_line_parameters,
169
170 &brw_drawing_rect,
171
172 &brw_indices, /* must come before brw_vertices */
173 &brw_index_buffer,
174 &brw_vertices,
175 };
176
177 static const struct brw_tracked_state *gen7_render_atoms[] =
178 {
179 /* Command packets: */
180
181 /* must do before binding table pointers, cc state ptrs */
182 &brw_state_base_address,
183
184 &brw_cc_vp,
185 &gen7_sf_clip_viewport,
186
187 &gen7_push_constant_space,
188 &gen7_urb,
189 &gen6_blend_state, /* must do before cc unit */
190 &gen6_color_calc_state, /* must do before cc unit */
191 &gen6_depth_stencil_state, /* must do before cc unit */
192
193 &gen6_vs_push_constants, /* Before vs_state */
194 &gen6_gs_push_constants, /* Before gs_state */
195 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
196
197 /* Surface state setup. Must come before the VS/WM unit. The binding
198 * table upload must be last.
199 */
200 &brw_vs_pull_constants,
201 &brw_vs_ubo_surfaces,
202 &brw_vs_abo_surfaces,
203 &brw_gs_pull_constants,
204 &brw_gs_ubo_surfaces,
205 &brw_gs_abo_surfaces,
206 &brw_wm_pull_constants,
207 &brw_wm_ubo_surfaces,
208 &brw_wm_abo_surfaces,
209 &gen6_renderbuffer_surfaces,
210 &brw_texture_surfaces,
211 &brw_vs_binding_table,
212 &brw_gs_binding_table,
213 &brw_wm_binding_table,
214
215 &brw_fs_samplers,
216 &brw_vs_samplers,
217 &brw_gs_samplers,
218 &gen6_multisample_state,
219
220 &gen7_disable_stages,
221 &gen7_vs_state,
222 &gen7_gs_state,
223 &gen7_sol_state,
224 &gen7_clip_state,
225 &gen7_sbe_state,
226 &gen7_sf_state,
227 &gen7_wm_state,
228 &gen7_ps_state,
229
230 &gen6_scissor_state,
231
232 &gen7_depthbuffer,
233
234 &brw_polygon_stipple,
235 &brw_polygon_stipple_offset,
236
237 &brw_line_stipple,
238 &brw_aa_line_parameters,
239
240 &brw_drawing_rect,
241
242 &brw_indices, /* must come before brw_vertices */
243 &brw_index_buffer,
244 &brw_vertices,
245
246 &haswell_cut_index,
247 };
248
249 static const struct brw_tracked_state *gen7_compute_atoms[] =
250 {
251 };
252
253 static const struct brw_tracked_state *gen8_render_atoms[] =
254 {
255 /* Command packets: */
256 &gen8_state_base_address,
257
258 &brw_cc_vp,
259 &gen8_sf_clip_viewport,
260
261 &gen7_push_constant_space,
262 &gen7_urb,
263 &gen8_blend_state,
264 &gen6_color_calc_state,
265
266 &gen6_vs_push_constants, /* Before vs_state */
267 &gen6_gs_push_constants, /* Before gs_state */
268 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
269
270 /* Surface state setup. Must come before the VS/WM unit. The binding
271 * table upload must be last.
272 */
273 &brw_vs_pull_constants,
274 &brw_vs_ubo_surfaces,
275 &brw_vs_abo_surfaces,
276 &brw_gs_pull_constants,
277 &brw_gs_ubo_surfaces,
278 &brw_gs_abo_surfaces,
279 &brw_wm_pull_constants,
280 &brw_wm_ubo_surfaces,
281 &brw_wm_abo_surfaces,
282 &gen6_renderbuffer_surfaces,
283 &brw_texture_surfaces,
284 &brw_vs_binding_table,
285 &brw_gs_binding_table,
286 &brw_wm_binding_table,
287
288 &brw_fs_samplers,
289 &brw_vs_samplers,
290 &brw_gs_samplers,
291 &gen8_multisample_state,
292
293 &gen8_disable_stages,
294 &gen8_vs_state,
295 &gen8_gs_state,
296 &gen8_sol_state,
297 &gen6_clip_state,
298 &gen8_raster_state,
299 &gen8_sbe_state,
300 &gen8_sf_state,
301 &gen8_ps_blend,
302 &gen8_ps_extra,
303 &gen8_ps_state,
304 &gen8_wm_depth_stencil,
305 &gen8_wm_state,
306
307 &gen6_scissor_state,
308
309 &gen7_depthbuffer,
310
311 &brw_polygon_stipple,
312 &brw_polygon_stipple_offset,
313
314 &brw_line_stipple,
315 &brw_aa_line_parameters,
316
317 &brw_drawing_rect,
318
319 &gen8_vf_topology,
320
321 &brw_indices,
322 &gen8_index_buffer,
323 &gen8_vertices,
324
325 &haswell_cut_index,
326 &gen8_pma_fix,
327 };
328
329 static const struct brw_tracked_state *gen8_compute_atoms[] =
330 {
331 };
332
333 static void
334 brw_upload_initial_gpu_state(struct brw_context *brw)
335 {
336 /* On platforms with hardware contexts, we can set our initial GPU state
337 * right away rather than doing it via state atoms. This saves a small
338 * amount of overhead on every draw call.
339 */
340 if (!brw->hw_ctx)
341 return;
342
343 if (brw->gen == 6)
344 intel_emit_post_sync_nonzero_flush(brw);
345
346 brw_upload_invariant_state(brw);
347
348 /* Recommended optimization for Victim Cache eviction in pixel backend. */
349 if (brw->gen >= 9) {
350 BEGIN_BATCH(3);
351 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
352 OUT_BATCH(GEN7_CACHE_MODE_1);
353 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
354 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
355 ADVANCE_BATCH();
356 }
357
358 if (brw->gen >= 8) {
359 gen8_emit_3dstate_sample_pattern(brw);
360 }
361 }
362
363 static inline const struct brw_tracked_state *
364 brw_get_pipeline_atoms(struct brw_context *brw,
365 enum brw_pipeline pipeline)
366 {
367 switch (pipeline) {
368 case BRW_RENDER_PIPELINE:
369 return brw->render_atoms;
370 case BRW_COMPUTE_PIPELINE:
371 return brw->compute_atoms;
372 default:
373 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
374 unreachable("Unsupported pipeline");
375 return NULL;
376 }
377 }
378
379 static void
380 brw_copy_pipeline_atoms(struct brw_context *brw,
381 enum brw_pipeline pipeline,
382 const struct brw_tracked_state **atoms,
383 int num_atoms)
384 {
385 /* This is to work around brw_context::atoms being declared const. We want
386 * it to be const, but it needs to be initialized somehow!
387 */
388 struct brw_tracked_state *context_atoms =
389 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
390
391 for (int i = 0; i < num_atoms; i++) {
392 context_atoms[i] = *atoms[i];
393 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
394 assert(context_atoms[i].emit);
395 }
396
397 brw->num_atoms[pipeline] = num_atoms;
398 }
399
400 void brw_init_state( struct brw_context *brw )
401 {
402 struct gl_context *ctx = &brw->ctx;
403
404 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
405 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
406 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
407 ARRAY_SIZE(brw->render_atoms));
408 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
409 ARRAY_SIZE(brw->render_atoms));
410 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
411 ARRAY_SIZE(brw->compute_atoms));
412 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
413 ARRAY_SIZE(brw->compute_atoms));
414
415 brw_init_caches(brw);
416
417 if (brw->gen >= 8) {
418 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
419 gen8_render_atoms,
420 ARRAY_SIZE(gen8_render_atoms));
421 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
422 gen8_compute_atoms,
423 ARRAY_SIZE(gen8_compute_atoms));
424 } else if (brw->gen == 7) {
425 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
426 gen7_render_atoms,
427 ARRAY_SIZE(gen7_render_atoms));
428 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
429 gen7_compute_atoms,
430 ARRAY_SIZE(gen7_compute_atoms));
431 } else if (brw->gen == 6) {
432 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
433 gen6_atoms, ARRAY_SIZE(gen6_atoms));
434 } else {
435 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
436 gen4_atoms, ARRAY_SIZE(gen4_atoms));
437 }
438
439 brw_upload_initial_gpu_state(brw);
440
441 brw->NewGLState = ~0;
442 brw->ctx.NewDriverState = ~0ull;
443
444 /* ~0 is a nonsensical value which won't match anything we program, so
445 * the programming will take effect on the first time around.
446 */
447 brw->pma_stall_bits = ~0;
448
449 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
450 * dirty flags.
451 */
452 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
453
454 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
455 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
456 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
457 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
458 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
459 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
460 }
461
462
463 void brw_destroy_state( struct brw_context *brw )
464 {
465 brw_destroy_caches(brw);
466 }
467
468 /***********************************************************************
469 */
470
471 static bool
472 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
473 {
474 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
475 }
476
477 static void accumulate_state( struct brw_state_flags *a,
478 const struct brw_state_flags *b )
479 {
480 a->mesa |= b->mesa;
481 a->brw |= b->brw;
482 }
483
484
485 static void xor_states( struct brw_state_flags *result,
486 const struct brw_state_flags *a,
487 const struct brw_state_flags *b )
488 {
489 result->mesa = a->mesa ^ b->mesa;
490 result->brw = a->brw ^ b->brw;
491 }
492
493 struct dirty_bit_map {
494 uint64_t bit;
495 char *name;
496 uint32_t count;
497 };
498
499 #define DEFINE_BIT(name) {name, #name, 0}
500
501 static struct dirty_bit_map mesa_bits[] = {
502 DEFINE_BIT(_NEW_MODELVIEW),
503 DEFINE_BIT(_NEW_PROJECTION),
504 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
505 DEFINE_BIT(_NEW_COLOR),
506 DEFINE_BIT(_NEW_DEPTH),
507 DEFINE_BIT(_NEW_EVAL),
508 DEFINE_BIT(_NEW_FOG),
509 DEFINE_BIT(_NEW_HINT),
510 DEFINE_BIT(_NEW_LIGHT),
511 DEFINE_BIT(_NEW_LINE),
512 DEFINE_BIT(_NEW_PIXEL),
513 DEFINE_BIT(_NEW_POINT),
514 DEFINE_BIT(_NEW_POLYGON),
515 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
516 DEFINE_BIT(_NEW_SCISSOR),
517 DEFINE_BIT(_NEW_STENCIL),
518 DEFINE_BIT(_NEW_TEXTURE),
519 DEFINE_BIT(_NEW_TRANSFORM),
520 DEFINE_BIT(_NEW_VIEWPORT),
521 DEFINE_BIT(_NEW_ARRAY),
522 DEFINE_BIT(_NEW_RENDERMODE),
523 DEFINE_BIT(_NEW_BUFFERS),
524 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
525 DEFINE_BIT(_NEW_MULTISAMPLE),
526 DEFINE_BIT(_NEW_TRACK_MATRIX),
527 DEFINE_BIT(_NEW_PROGRAM),
528 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
529 DEFINE_BIT(_NEW_BUFFER_OBJECT),
530 DEFINE_BIT(_NEW_FRAG_CLAMP),
531 /* Avoid sign extension problems. */
532 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
533 {0, 0, 0}
534 };
535
536 static struct dirty_bit_map brw_bits[] = {
537 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
538 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
539 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
540 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
541 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
542 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
543 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
544 DEFINE_BIT(BRW_NEW_URB_FENCE),
545 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
546 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
547 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
548 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
549 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
550 DEFINE_BIT(BRW_NEW_PRIMITIVE),
551 DEFINE_BIT(BRW_NEW_CONTEXT),
552 DEFINE_BIT(BRW_NEW_PSP),
553 DEFINE_BIT(BRW_NEW_SURFACES),
554 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
555 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
556 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
557 DEFINE_BIT(BRW_NEW_INDICES),
558 DEFINE_BIT(BRW_NEW_VERTICES),
559 DEFINE_BIT(BRW_NEW_BATCH),
560 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
561 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
562 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
563 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
564 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
565 DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
566 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
567 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
568 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
569 DEFINE_BIT(BRW_NEW_STATS_WM),
570 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
571 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
572 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
573 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
574 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
575 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
576 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
577 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
578 DEFINE_BIT(BRW_NEW_CC_VP),
579 DEFINE_BIT(BRW_NEW_SF_VP),
580 DEFINE_BIT(BRW_NEW_CLIP_VP),
581 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
582 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
583 {0, 0, 0}
584 };
585
586 static void
587 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
588 {
589 for (int i = 0; bit_map[i].bit != 0; i++) {
590 if (bit_map[i].bit & bits)
591 bit_map[i].count++;
592 }
593 }
594
595 static void
596 brw_print_dirty_count(struct dirty_bit_map *bit_map)
597 {
598 for (int i = 0; bit_map[i].bit != 0; i++) {
599 if (bit_map[i].count > 1) {
600 fprintf(stderr, "0x%016lx: %12d (%s)\n",
601 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
602 }
603 }
604 }
605
606 static inline void
607 brw_upload_programs(struct brw_context *brw,
608 enum brw_pipeline pipeline)
609 {
610 if (pipeline == BRW_RENDER_PIPELINE) {
611 brw_upload_vs_prog(brw);
612
613 if (brw->gen < 6)
614 brw_upload_ff_gs_prog(brw);
615 else
616 brw_upload_gs_prog(brw);
617
618 brw_upload_wm_prog(brw);
619 }
620 }
621
622 static inline void
623 merge_ctx_state(struct brw_context *brw,
624 struct brw_state_flags *state)
625 {
626 state->mesa |= brw->NewGLState;
627 assert(brw->state.dirty.mesa == 0);
628 state->brw |= brw->ctx.NewDriverState;
629 assert(brw->state.dirty.brw == 0ull);
630 }
631
632 static inline void
633 check_and_emit_atom(struct brw_context *brw,
634 struct brw_state_flags *state,
635 const struct brw_tracked_state *atom)
636 {
637 if (check_state(state, &atom->dirty)) {
638 atom->emit(brw);
639 merge_ctx_state(brw, state);
640 }
641 }
642
643 static inline void
644 brw_upload_pipeline_state(struct brw_context *brw,
645 enum brw_pipeline pipeline)
646 {
647 struct gl_context *ctx = &brw->ctx;
648 struct brw_state_flags *brw_state = &brw->state.dirty;
649 int i;
650 static int dirty_count = 0;
651 struct brw_state_flags state = brw->state.pipelines[pipeline];
652
653 if (0) {
654 /* Always re-emit all state. */
655 brw->NewGLState = ~0;
656 ctx->NewDriverState = ~0ull;
657 }
658
659 if (pipeline == BRW_RENDER_PIPELINE) {
660 if (brw->fragment_program != ctx->FragmentProgram._Current) {
661 brw->fragment_program = ctx->FragmentProgram._Current;
662 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
663 }
664
665 if (brw->geometry_program != ctx->GeometryProgram._Current) {
666 brw->geometry_program = ctx->GeometryProgram._Current;
667 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
668 }
669
670 if (brw->vertex_program != ctx->VertexProgram._Current) {
671 brw->vertex_program = ctx->VertexProgram._Current;
672 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
673 }
674 }
675
676 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
677 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
678 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
679 }
680
681 if (brw->num_samples != ctx->DrawBuffer->Visual.samples) {
682 brw->num_samples = ctx->DrawBuffer->Visual.samples;
683 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
684 }
685
686 /* Exit early if no state is flagged as dirty */
687 merge_ctx_state(brw, &state);
688 if ((state.mesa | state.brw) == 0)
689 return;
690
691 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
692 if (brw->gen == 6)
693 intel_emit_post_sync_nonzero_flush(brw);
694
695 brw_upload_programs(brw, pipeline);
696 merge_ctx_state(brw, &state);
697
698 const struct brw_tracked_state *atoms =
699 brw_get_pipeline_atoms(brw, pipeline);
700 const int num_atoms = brw->num_atoms[pipeline];
701
702 if (unlikely(INTEL_DEBUG)) {
703 /* Debug version which enforces various sanity checks on the
704 * state flags which are generated and checked to help ensure
705 * state atoms are ordered correctly in the list.
706 */
707 struct brw_state_flags examined, prev;
708 memset(&examined, 0, sizeof(examined));
709 prev = state;
710
711 for (i = 0; i < num_atoms; i++) {
712 const struct brw_tracked_state *atom = &atoms[i];
713 struct brw_state_flags generated;
714
715 check_and_emit_atom(brw, &state, atom);
716
717 accumulate_state(&examined, &atom->dirty);
718
719 /* generated = (prev ^ state)
720 * if (examined & generated)
721 * fail;
722 */
723 xor_states(&generated, &prev, &state);
724 assert(!check_state(&examined, &generated));
725 prev = state;
726 }
727 }
728 else {
729 for (i = 0; i < num_atoms; i++) {
730 const struct brw_tracked_state *atom = &atoms[i];
731
732 check_and_emit_atom(brw, &state, atom);
733 }
734 }
735
736 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
737 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
738
739 brw_update_dirty_count(mesa_bits, state.mesa);
740 brw_update_dirty_count(brw_bits, state.brw);
741 if (dirty_count++ % 1000 == 0) {
742 brw_print_dirty_count(mesa_bits);
743 brw_print_dirty_count(brw_bits);
744 fprintf(stderr, "\n");
745 }
746 }
747 }
748
749 /***********************************************************************
750 * Emit all state:
751 */
752 void brw_upload_render_state(struct brw_context *brw)
753 {
754 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
755 }
756
757 static inline void
758 brw_pipeline_state_finished(struct brw_context *brw,
759 enum brw_pipeline pipeline)
760 {
761 struct brw_state_flags *state = &brw->state.dirty;
762
763 /* Save all dirty state into the other pipelines */
764 for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
765 if (i != pipeline) {
766 brw->state.pipelines[i].mesa |= brw->NewGLState;
767 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
768 } else {
769 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
770 }
771 }
772
773 brw->NewGLState = 0;
774 brw->ctx.NewDriverState = 0ull;
775 memset(state, 0, sizeof(*state));
776 }
777
778 /**
779 * Clear dirty bits to account for the fact that the state emitted by
780 * brw_upload_render_state() has been committed to the hardware. This is a
781 * separate call from brw_upload_render_state() because it's possible that
782 * after the call to brw_upload_render_state(), we will discover that we've
783 * run out of aperture space, and need to rewind the batch buffer to the state
784 * it had before the brw_upload_render_state() call.
785 */
786 void
787 brw_render_state_finished(struct brw_context *brw)
788 {
789 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
790 }
791
792 void
793 brw_upload_compute_state(struct brw_context *brw)
794 {
795 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
796 }
797
798 void
799 brw_compute_state_finished(struct brw_context *brw)
800 {
801 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
802 }