b2ca9c2c0e7be2c851b486c4179fab8df6dd1909
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_disable_stages,
229 &gen7_vs_state,
230 &gen7_gs_state,
231 &gen7_sol_state,
232 &gen7_clip_state,
233 &gen7_sbe_state,
234 &gen7_sf_state,
235 &gen7_wm_state,
236 &gen7_ps_state,
237
238 &gen6_scissor_state,
239
240 &gen7_depthbuffer,
241
242 &brw_polygon_stipple,
243 &brw_polygon_stipple_offset,
244
245 &brw_line_stipple,
246 &brw_aa_line_parameters,
247
248 &brw_drawing_rect,
249
250 &brw_indices, /* must come before brw_vertices */
251 &brw_index_buffer,
252 &brw_vertices,
253
254 &haswell_cut_index,
255 };
256
257 static const struct brw_tracked_state *gen7_compute_atoms[] =
258 {
259 &brw_state_base_address,
260 &brw_cs_image_surfaces,
261 &gen7_cs_push_constants,
262 &brw_cs_abo_surfaces,
263 &brw_cs_state,
264 };
265
266 static const struct brw_tracked_state *gen8_render_atoms[] =
267 {
268 /* Command packets: */
269 &gen8_state_base_address,
270
271 &brw_cc_vp,
272 &gen8_sf_clip_viewport,
273
274 &gen7_push_constant_space,
275 &gen7_urb,
276 &gen8_blend_state,
277 &gen6_color_calc_state,
278
279 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
280
281 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
282 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
283 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
284
285 &gen6_vs_push_constants, /* Before vs_state */
286 &gen6_gs_push_constants, /* Before gs_state */
287 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
288
289 /* Surface state setup. Must come before the VS/WM unit. The binding
290 * table upload must be last.
291 */
292 &brw_vs_pull_constants,
293 &brw_vs_ubo_surfaces,
294 &brw_vs_abo_surfaces,
295 &brw_gs_pull_constants,
296 &brw_gs_ubo_surfaces,
297 &brw_gs_abo_surfaces,
298 &brw_wm_pull_constants,
299 &brw_wm_ubo_surfaces,
300 &brw_wm_abo_surfaces,
301 &gen6_renderbuffer_surfaces,
302 &brw_texture_surfaces,
303 &brw_vs_binding_table,
304 &brw_gs_binding_table,
305 &brw_wm_binding_table,
306
307 &brw_fs_samplers,
308 &brw_vs_samplers,
309 &brw_gs_samplers,
310 &gen8_multisample_state,
311
312 &gen8_disable_stages,
313 &gen8_vs_state,
314 &gen8_gs_state,
315 &gen8_sol_state,
316 &gen6_clip_state,
317 &gen8_raster_state,
318 &gen8_sbe_state,
319 &gen8_sf_state,
320 &gen8_ps_blend,
321 &gen8_ps_extra,
322 &gen8_ps_state,
323 &gen8_wm_depth_stencil,
324 &gen8_wm_state,
325
326 &gen6_scissor_state,
327
328 &gen7_depthbuffer,
329
330 &brw_polygon_stipple,
331 &brw_polygon_stipple_offset,
332
333 &brw_line_stipple,
334 &brw_aa_line_parameters,
335
336 &brw_drawing_rect,
337
338 &gen8_vf_topology,
339
340 &brw_indices,
341 &gen8_index_buffer,
342 &gen8_vertices,
343
344 &haswell_cut_index,
345 &gen8_pma_fix,
346 };
347
348 static const struct brw_tracked_state *gen8_compute_atoms[] =
349 {
350 &gen8_state_base_address,
351 &brw_cs_image_surfaces,
352 &gen7_cs_push_constants,
353 &brw_cs_abo_surfaces,
354 &brw_cs_state,
355 };
356
357 static void
358 brw_upload_initial_gpu_state(struct brw_context *brw)
359 {
360 /* On platforms with hardware contexts, we can set our initial GPU state
361 * right away rather than doing it via state atoms. This saves a small
362 * amount of overhead on every draw call.
363 */
364 if (!brw->hw_ctx)
365 return;
366
367 if (brw->gen == 6)
368 brw_emit_post_sync_nonzero_flush(brw);
369
370 brw_upload_invariant_state(brw);
371
372 /* Recommended optimization for Victim Cache eviction in pixel backend. */
373 if (brw->gen >= 9) {
374 BEGIN_BATCH(3);
375 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
376 OUT_BATCH(GEN7_CACHE_MODE_1);
377 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
378 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
379 ADVANCE_BATCH();
380 }
381
382 if (brw->gen >= 8) {
383 gen8_emit_3dstate_sample_pattern(brw);
384 }
385 }
386
387 static inline const struct brw_tracked_state *
388 brw_get_pipeline_atoms(struct brw_context *brw,
389 enum brw_pipeline pipeline)
390 {
391 switch (pipeline) {
392 case BRW_RENDER_PIPELINE:
393 return brw->render_atoms;
394 case BRW_COMPUTE_PIPELINE:
395 return brw->compute_atoms;
396 default:
397 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
398 unreachable("Unsupported pipeline");
399 return NULL;
400 }
401 }
402
403 static void
404 brw_copy_pipeline_atoms(struct brw_context *brw,
405 enum brw_pipeline pipeline,
406 const struct brw_tracked_state **atoms,
407 int num_atoms)
408 {
409 /* This is to work around brw_context::atoms being declared const. We want
410 * it to be const, but it needs to be initialized somehow!
411 */
412 struct brw_tracked_state *context_atoms =
413 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
414
415 for (int i = 0; i < num_atoms; i++) {
416 context_atoms[i] = *atoms[i];
417 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
418 assert(context_atoms[i].emit);
419 }
420
421 brw->num_atoms[pipeline] = num_atoms;
422 }
423
424 void brw_init_state( struct brw_context *brw )
425 {
426 struct gl_context *ctx = &brw->ctx;
427
428 /* Force the first brw_select_pipeline to emit pipeline select */
429 brw->last_pipeline = BRW_NUM_PIPELINES;
430
431 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
432 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
433 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
434 ARRAY_SIZE(brw->render_atoms));
435 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
436 ARRAY_SIZE(brw->render_atoms));
437 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
438 ARRAY_SIZE(brw->compute_atoms));
439 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
440 ARRAY_SIZE(brw->compute_atoms));
441
442 brw_init_caches(brw);
443
444 if (brw->gen >= 8) {
445 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
446 gen8_render_atoms,
447 ARRAY_SIZE(gen8_render_atoms));
448 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
449 gen8_compute_atoms,
450 ARRAY_SIZE(gen8_compute_atoms));
451 } else if (brw->gen == 7) {
452 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
453 gen7_render_atoms,
454 ARRAY_SIZE(gen7_render_atoms));
455 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
456 gen7_compute_atoms,
457 ARRAY_SIZE(gen7_compute_atoms));
458 } else if (brw->gen == 6) {
459 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
460 gen6_atoms, ARRAY_SIZE(gen6_atoms));
461 } else {
462 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
463 gen4_atoms, ARRAY_SIZE(gen4_atoms));
464 }
465
466 brw_upload_initial_gpu_state(brw);
467
468 brw->NewGLState = ~0;
469 brw->ctx.NewDriverState = ~0ull;
470
471 /* ~0 is a nonsensical value which won't match anything we program, so
472 * the programming will take effect on the first time around.
473 */
474 brw->pma_stall_bits = ~0;
475
476 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
477 * dirty flags.
478 */
479 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
480
481 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
482 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
483 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
484 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
485 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
486 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
487 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
488 }
489
490
491 void brw_destroy_state( struct brw_context *brw )
492 {
493 brw_destroy_caches(brw);
494 }
495
496 /***********************************************************************
497 */
498
499 static bool
500 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
501 {
502 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
503 }
504
505 static void accumulate_state( struct brw_state_flags *a,
506 const struct brw_state_flags *b )
507 {
508 a->mesa |= b->mesa;
509 a->brw |= b->brw;
510 }
511
512
513 static void xor_states( struct brw_state_flags *result,
514 const struct brw_state_flags *a,
515 const struct brw_state_flags *b )
516 {
517 result->mesa = a->mesa ^ b->mesa;
518 result->brw = a->brw ^ b->brw;
519 }
520
521 struct dirty_bit_map {
522 uint64_t bit;
523 char *name;
524 uint32_t count;
525 };
526
527 #define DEFINE_BIT(name) {name, #name, 0}
528
529 static struct dirty_bit_map mesa_bits[] = {
530 DEFINE_BIT(_NEW_MODELVIEW),
531 DEFINE_BIT(_NEW_PROJECTION),
532 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
533 DEFINE_BIT(_NEW_COLOR),
534 DEFINE_BIT(_NEW_DEPTH),
535 DEFINE_BIT(_NEW_EVAL),
536 DEFINE_BIT(_NEW_FOG),
537 DEFINE_BIT(_NEW_HINT),
538 DEFINE_BIT(_NEW_LIGHT),
539 DEFINE_BIT(_NEW_LINE),
540 DEFINE_BIT(_NEW_PIXEL),
541 DEFINE_BIT(_NEW_POINT),
542 DEFINE_BIT(_NEW_POLYGON),
543 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
544 DEFINE_BIT(_NEW_SCISSOR),
545 DEFINE_BIT(_NEW_STENCIL),
546 DEFINE_BIT(_NEW_TEXTURE),
547 DEFINE_BIT(_NEW_TRANSFORM),
548 DEFINE_BIT(_NEW_VIEWPORT),
549 DEFINE_BIT(_NEW_ARRAY),
550 DEFINE_BIT(_NEW_RENDERMODE),
551 DEFINE_BIT(_NEW_BUFFERS),
552 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
553 DEFINE_BIT(_NEW_MULTISAMPLE),
554 DEFINE_BIT(_NEW_TRACK_MATRIX),
555 DEFINE_BIT(_NEW_PROGRAM),
556 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
557 DEFINE_BIT(_NEW_BUFFER_OBJECT),
558 DEFINE_BIT(_NEW_FRAG_CLAMP),
559 /* Avoid sign extension problems. */
560 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
561 {0, 0, 0}
562 };
563
564 static struct dirty_bit_map brw_bits[] = {
565 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
566 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
567 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
568 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
569 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
570 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
571 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
572 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
573 DEFINE_BIT(BRW_NEW_URB_FENCE),
574 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
575 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
576 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
577 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
578 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
579 DEFINE_BIT(BRW_NEW_PRIMITIVE),
580 DEFINE_BIT(BRW_NEW_CONTEXT),
581 DEFINE_BIT(BRW_NEW_PSP),
582 DEFINE_BIT(BRW_NEW_SURFACES),
583 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
584 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
585 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
586 DEFINE_BIT(BRW_NEW_INDICES),
587 DEFINE_BIT(BRW_NEW_VERTICES),
588 DEFINE_BIT(BRW_NEW_BATCH),
589 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
590 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
591 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
592 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
593 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
594 DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
595 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
596 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
597 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
598 DEFINE_BIT(BRW_NEW_STATS_WM),
599 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
600 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
601 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
602 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
603 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
604 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
605 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
606 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
607 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
608 DEFINE_BIT(BRW_NEW_CC_VP),
609 DEFINE_BIT(BRW_NEW_SF_VP),
610 DEFINE_BIT(BRW_NEW_CLIP_VP),
611 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
612 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
613 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
614 {0, 0, 0}
615 };
616
617 static void
618 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
619 {
620 for (int i = 0; bit_map[i].bit != 0; i++) {
621 if (bit_map[i].bit & bits)
622 bit_map[i].count++;
623 }
624 }
625
626 static void
627 brw_print_dirty_count(struct dirty_bit_map *bit_map)
628 {
629 for (int i = 0; bit_map[i].bit != 0; i++) {
630 if (bit_map[i].count > 1) {
631 fprintf(stderr, "0x%016lx: %12d (%s)\n",
632 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
633 }
634 }
635 }
636
637 static inline void
638 brw_upload_programs(struct brw_context *brw,
639 enum brw_pipeline pipeline)
640 {
641 if (pipeline == BRW_RENDER_PIPELINE) {
642 brw_upload_vs_prog(brw);
643
644 if (brw->gen < 6)
645 brw_upload_ff_gs_prog(brw);
646 else
647 brw_upload_gs_prog(brw);
648
649 brw_upload_wm_prog(brw);
650 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
651 brw_upload_cs_prog(brw);
652 }
653 }
654
655 static inline void
656 merge_ctx_state(struct brw_context *brw,
657 struct brw_state_flags *state)
658 {
659 state->mesa |= brw->NewGLState;
660 state->brw |= brw->ctx.NewDriverState;
661 }
662
663 static inline void
664 check_and_emit_atom(struct brw_context *brw,
665 struct brw_state_flags *state,
666 const struct brw_tracked_state *atom)
667 {
668 if (check_state(state, &atom->dirty)) {
669 atom->emit(brw);
670 merge_ctx_state(brw, state);
671 }
672 }
673
674 static inline void
675 brw_upload_pipeline_state(struct brw_context *brw,
676 enum brw_pipeline pipeline)
677 {
678 struct gl_context *ctx = &brw->ctx;
679 int i;
680 static int dirty_count = 0;
681 struct brw_state_flags state = brw->state.pipelines[pipeline];
682 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
683
684 brw_select_pipeline(brw, pipeline);
685
686 if (0) {
687 /* Always re-emit all state. */
688 brw->NewGLState = ~0;
689 ctx->NewDriverState = ~0ull;
690 }
691
692 if (pipeline == BRW_RENDER_PIPELINE) {
693 if (brw->fragment_program != ctx->FragmentProgram._Current) {
694 brw->fragment_program = ctx->FragmentProgram._Current;
695 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
696 }
697
698 if (brw->geometry_program != ctx->GeometryProgram._Current) {
699 brw->geometry_program = ctx->GeometryProgram._Current;
700 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
701 }
702
703 if (brw->vertex_program != ctx->VertexProgram._Current) {
704 brw->vertex_program = ctx->VertexProgram._Current;
705 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
706 }
707 }
708
709 if (brw->compute_program != ctx->ComputeProgram._Current) {
710 brw->compute_program = ctx->ComputeProgram._Current;
711 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
712 }
713
714 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
715 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
716 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
717 }
718
719 if (brw->num_samples != fb_samples) {
720 brw->num_samples = fb_samples;
721 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
722 }
723
724 /* Exit early if no state is flagged as dirty */
725 merge_ctx_state(brw, &state);
726 if ((state.mesa | state.brw) == 0)
727 return;
728
729 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
730 if (brw->gen == 6)
731 brw_emit_post_sync_nonzero_flush(brw);
732
733 brw_upload_programs(brw, pipeline);
734 merge_ctx_state(brw, &state);
735
736 const struct brw_tracked_state *atoms =
737 brw_get_pipeline_atoms(brw, pipeline);
738 const int num_atoms = brw->num_atoms[pipeline];
739
740 if (unlikely(INTEL_DEBUG)) {
741 /* Debug version which enforces various sanity checks on the
742 * state flags which are generated and checked to help ensure
743 * state atoms are ordered correctly in the list.
744 */
745 struct brw_state_flags examined, prev;
746 memset(&examined, 0, sizeof(examined));
747 prev = state;
748
749 for (i = 0; i < num_atoms; i++) {
750 const struct brw_tracked_state *atom = &atoms[i];
751 struct brw_state_flags generated;
752
753 check_and_emit_atom(brw, &state, atom);
754
755 accumulate_state(&examined, &atom->dirty);
756
757 /* generated = (prev ^ state)
758 * if (examined & generated)
759 * fail;
760 */
761 xor_states(&generated, &prev, &state);
762 assert(!check_state(&examined, &generated));
763 prev = state;
764 }
765 }
766 else {
767 for (i = 0; i < num_atoms; i++) {
768 const struct brw_tracked_state *atom = &atoms[i];
769
770 check_and_emit_atom(brw, &state, atom);
771 }
772 }
773
774 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
775 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
776
777 brw_update_dirty_count(mesa_bits, state.mesa);
778 brw_update_dirty_count(brw_bits, state.brw);
779 if (dirty_count++ % 1000 == 0) {
780 brw_print_dirty_count(mesa_bits);
781 brw_print_dirty_count(brw_bits);
782 fprintf(stderr, "\n");
783 }
784 }
785 }
786
787 /***********************************************************************
788 * Emit all state:
789 */
790 void brw_upload_render_state(struct brw_context *brw)
791 {
792 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
793 }
794
795 static inline void
796 brw_pipeline_state_finished(struct brw_context *brw,
797 enum brw_pipeline pipeline)
798 {
799 /* Save all dirty state into the other pipelines */
800 for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
801 if (i != pipeline) {
802 brw->state.pipelines[i].mesa |= brw->NewGLState;
803 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
804 } else {
805 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
806 }
807 }
808
809 brw->NewGLState = 0;
810 brw->ctx.NewDriverState = 0ull;
811 }
812
813 /**
814 * Clear dirty bits to account for the fact that the state emitted by
815 * brw_upload_render_state() has been committed to the hardware. This is a
816 * separate call from brw_upload_render_state() because it's possible that
817 * after the call to brw_upload_render_state(), we will discover that we've
818 * run out of aperture space, and need to rewind the batch buffer to the state
819 * it had before the brw_upload_render_state() call.
820 */
821 void
822 brw_render_state_finished(struct brw_context *brw)
823 {
824 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
825 }
826
827 void
828 brw_upload_compute_state(struct brw_context *brw)
829 {
830 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
831 }
832
833 void
834 brw_compute_state_finished(struct brw_context *brw)
835 {
836 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
837 }