79b8301954e4164e5edb8efd6515b8cf9497fc7b
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_disable_stages,
229 &gen7_vs_state,
230 &gen7_gs_state,
231 &gen7_sol_state,
232 &gen7_clip_state,
233 &gen7_sbe_state,
234 &gen7_sf_state,
235 &gen7_wm_state,
236 &gen7_ps_state,
237
238 &gen6_scissor_state,
239
240 &gen7_depthbuffer,
241
242 &brw_polygon_stipple,
243 &brw_polygon_stipple_offset,
244
245 &brw_line_stipple,
246 &brw_aa_line_parameters,
247
248 &brw_drawing_rect,
249
250 &brw_indices, /* must come before brw_vertices */
251 &brw_index_buffer,
252 &brw_vertices,
253
254 &haswell_cut_index,
255 };
256
257 static const struct brw_tracked_state *gen7_compute_atoms[] =
258 {
259 &brw_state_base_address,
260 &brw_cs_image_surfaces,
261 &gen7_cs_push_constants,
262 &brw_cs_ubo_surfaces,
263 &brw_cs_abo_surfaces,
264 &brw_texture_surfaces,
265 &brw_cs_work_groups_surface,
266 &brw_cs_state,
267 };
268
269 static const struct brw_tracked_state *gen8_render_atoms[] =
270 {
271 /* Command packets: */
272 &gen8_state_base_address,
273
274 &brw_cc_vp,
275 &gen8_sf_clip_viewport,
276
277 &gen7_push_constant_space,
278 &gen7_urb,
279 &gen8_blend_state,
280 &gen6_color_calc_state,
281
282 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
283
284 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
285 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
286 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
287
288 &gen6_vs_push_constants, /* Before vs_state */
289 &gen6_gs_push_constants, /* Before gs_state */
290 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
291
292 /* Surface state setup. Must come before the VS/WM unit. The binding
293 * table upload must be last.
294 */
295 &brw_vs_pull_constants,
296 &brw_vs_ubo_surfaces,
297 &brw_vs_abo_surfaces,
298 &brw_gs_pull_constants,
299 &brw_gs_ubo_surfaces,
300 &brw_gs_abo_surfaces,
301 &brw_wm_pull_constants,
302 &brw_wm_ubo_surfaces,
303 &brw_wm_abo_surfaces,
304 &gen6_renderbuffer_surfaces,
305 &brw_texture_surfaces,
306 &brw_vs_binding_table,
307 &brw_gs_binding_table,
308 &brw_wm_binding_table,
309
310 &brw_fs_samplers,
311 &brw_vs_samplers,
312 &brw_gs_samplers,
313 &gen8_multisample_state,
314
315 &gen8_disable_stages,
316 &gen8_vs_state,
317 &gen8_gs_state,
318 &gen8_sol_state,
319 &gen6_clip_state,
320 &gen8_raster_state,
321 &gen8_sbe_state,
322 &gen8_sf_state,
323 &gen8_ps_blend,
324 &gen8_ps_extra,
325 &gen8_ps_state,
326 &gen8_wm_depth_stencil,
327 &gen8_wm_state,
328
329 &gen6_scissor_state,
330
331 &gen7_depthbuffer,
332
333 &brw_polygon_stipple,
334 &brw_polygon_stipple_offset,
335
336 &brw_line_stipple,
337 &brw_aa_line_parameters,
338
339 &brw_drawing_rect,
340
341 &gen8_vf_topology,
342
343 &brw_indices,
344 &gen8_index_buffer,
345 &gen8_vertices,
346
347 &haswell_cut_index,
348 &gen8_pma_fix,
349 };
350
351 static const struct brw_tracked_state *gen8_compute_atoms[] =
352 {
353 &gen8_state_base_address,
354 &brw_cs_image_surfaces,
355 &gen7_cs_push_constants,
356 &brw_cs_ubo_surfaces,
357 &brw_cs_abo_surfaces,
358 &brw_texture_surfaces,
359 &brw_cs_work_groups_surface,
360 &brw_cs_state,
361 };
362
363 static void
364 brw_upload_initial_gpu_state(struct brw_context *brw)
365 {
366 /* On platforms with hardware contexts, we can set our initial GPU state
367 * right away rather than doing it via state atoms. This saves a small
368 * amount of overhead on every draw call.
369 */
370 if (!brw->hw_ctx)
371 return;
372
373 if (brw->gen == 6)
374 brw_emit_post_sync_nonzero_flush(brw);
375
376 brw_upload_invariant_state(brw);
377
378 /* Recommended optimization for Victim Cache eviction in pixel backend. */
379 if (brw->gen >= 9) {
380 BEGIN_BATCH(3);
381 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
382 OUT_BATCH(GEN7_CACHE_MODE_1);
383 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
384 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
385 ADVANCE_BATCH();
386 }
387
388 if (brw->gen >= 8) {
389 gen8_emit_3dstate_sample_pattern(brw);
390 }
391 }
392
393 static inline const struct brw_tracked_state *
394 brw_get_pipeline_atoms(struct brw_context *brw,
395 enum brw_pipeline pipeline)
396 {
397 switch (pipeline) {
398 case BRW_RENDER_PIPELINE:
399 return brw->render_atoms;
400 case BRW_COMPUTE_PIPELINE:
401 return brw->compute_atoms;
402 default:
403 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
404 unreachable("Unsupported pipeline");
405 return NULL;
406 }
407 }
408
409 static void
410 brw_copy_pipeline_atoms(struct brw_context *brw,
411 enum brw_pipeline pipeline,
412 const struct brw_tracked_state **atoms,
413 int num_atoms)
414 {
415 /* This is to work around brw_context::atoms being declared const. We want
416 * it to be const, but it needs to be initialized somehow!
417 */
418 struct brw_tracked_state *context_atoms =
419 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
420
421 for (int i = 0; i < num_atoms; i++) {
422 context_atoms[i] = *atoms[i];
423 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
424 assert(context_atoms[i].emit);
425 }
426
427 brw->num_atoms[pipeline] = num_atoms;
428 }
429
430 void brw_init_state( struct brw_context *brw )
431 {
432 struct gl_context *ctx = &brw->ctx;
433
434 /* Force the first brw_select_pipeline to emit pipeline select */
435 brw->last_pipeline = BRW_NUM_PIPELINES;
436
437 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
438 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
439 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
440 ARRAY_SIZE(brw->render_atoms));
441 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
442 ARRAY_SIZE(brw->render_atoms));
443 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
444 ARRAY_SIZE(brw->compute_atoms));
445 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
446 ARRAY_SIZE(brw->compute_atoms));
447
448 brw_init_caches(brw);
449
450 if (brw->gen >= 8) {
451 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
452 gen8_render_atoms,
453 ARRAY_SIZE(gen8_render_atoms));
454 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
455 gen8_compute_atoms,
456 ARRAY_SIZE(gen8_compute_atoms));
457 } else if (brw->gen == 7) {
458 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
459 gen7_render_atoms,
460 ARRAY_SIZE(gen7_render_atoms));
461 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
462 gen7_compute_atoms,
463 ARRAY_SIZE(gen7_compute_atoms));
464 } else if (brw->gen == 6) {
465 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
466 gen6_atoms, ARRAY_SIZE(gen6_atoms));
467 } else {
468 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
469 gen4_atoms, ARRAY_SIZE(gen4_atoms));
470 }
471
472 brw_upload_initial_gpu_state(brw);
473
474 brw->NewGLState = ~0;
475 brw->ctx.NewDriverState = ~0ull;
476
477 /* ~0 is a nonsensical value which won't match anything we program, so
478 * the programming will take effect on the first time around.
479 */
480 brw->pma_stall_bits = ~0;
481
482 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
483 * dirty flags.
484 */
485 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
486
487 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
488 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
489 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
490 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
491 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
492 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
493 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
494 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
495 }
496
497
498 void brw_destroy_state( struct brw_context *brw )
499 {
500 brw_destroy_caches(brw);
501 }
502
503 /***********************************************************************
504 */
505
506 static bool
507 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
508 {
509 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
510 }
511
512 static void accumulate_state( struct brw_state_flags *a,
513 const struct brw_state_flags *b )
514 {
515 a->mesa |= b->mesa;
516 a->brw |= b->brw;
517 }
518
519
520 static void xor_states( struct brw_state_flags *result,
521 const struct brw_state_flags *a,
522 const struct brw_state_flags *b )
523 {
524 result->mesa = a->mesa ^ b->mesa;
525 result->brw = a->brw ^ b->brw;
526 }
527
528 struct dirty_bit_map {
529 uint64_t bit;
530 char *name;
531 uint32_t count;
532 };
533
534 #define DEFINE_BIT(name) {name, #name, 0}
535
536 static struct dirty_bit_map mesa_bits[] = {
537 DEFINE_BIT(_NEW_MODELVIEW),
538 DEFINE_BIT(_NEW_PROJECTION),
539 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
540 DEFINE_BIT(_NEW_COLOR),
541 DEFINE_BIT(_NEW_DEPTH),
542 DEFINE_BIT(_NEW_EVAL),
543 DEFINE_BIT(_NEW_FOG),
544 DEFINE_BIT(_NEW_HINT),
545 DEFINE_BIT(_NEW_LIGHT),
546 DEFINE_BIT(_NEW_LINE),
547 DEFINE_BIT(_NEW_PIXEL),
548 DEFINE_BIT(_NEW_POINT),
549 DEFINE_BIT(_NEW_POLYGON),
550 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
551 DEFINE_BIT(_NEW_SCISSOR),
552 DEFINE_BIT(_NEW_STENCIL),
553 DEFINE_BIT(_NEW_TEXTURE),
554 DEFINE_BIT(_NEW_TRANSFORM),
555 DEFINE_BIT(_NEW_VIEWPORT),
556 DEFINE_BIT(_NEW_ARRAY),
557 DEFINE_BIT(_NEW_RENDERMODE),
558 DEFINE_BIT(_NEW_BUFFERS),
559 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
560 DEFINE_BIT(_NEW_MULTISAMPLE),
561 DEFINE_BIT(_NEW_TRACK_MATRIX),
562 DEFINE_BIT(_NEW_PROGRAM),
563 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
564 DEFINE_BIT(_NEW_BUFFER_OBJECT),
565 DEFINE_BIT(_NEW_FRAG_CLAMP),
566 /* Avoid sign extension problems. */
567 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
568 {0, 0, 0}
569 };
570
571 static struct dirty_bit_map brw_bits[] = {
572 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
573 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
574 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
575 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
576 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
577 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
578 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
579 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
580 DEFINE_BIT(BRW_NEW_URB_FENCE),
581 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
582 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
583 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
584 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
585 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
586 DEFINE_BIT(BRW_NEW_PRIMITIVE),
587 DEFINE_BIT(BRW_NEW_CONTEXT),
588 DEFINE_BIT(BRW_NEW_PSP),
589 DEFINE_BIT(BRW_NEW_SURFACES),
590 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
591 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
592 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
593 DEFINE_BIT(BRW_NEW_INDICES),
594 DEFINE_BIT(BRW_NEW_VERTICES),
595 DEFINE_BIT(BRW_NEW_BATCH),
596 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
597 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
598 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
599 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
600 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
601 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
602 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
603 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
604 DEFINE_BIT(BRW_NEW_STATS_WM),
605 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
606 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
607 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
608 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
609 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
610 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
611 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
612 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
613 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
614 DEFINE_BIT(BRW_NEW_CC_VP),
615 DEFINE_BIT(BRW_NEW_SF_VP),
616 DEFINE_BIT(BRW_NEW_CLIP_VP),
617 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
618 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
619 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
620 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
621 {0, 0, 0}
622 };
623
624 static void
625 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
626 {
627 for (int i = 0; bit_map[i].bit != 0; i++) {
628 if (bit_map[i].bit & bits)
629 bit_map[i].count++;
630 }
631 }
632
633 static void
634 brw_print_dirty_count(struct dirty_bit_map *bit_map)
635 {
636 for (int i = 0; bit_map[i].bit != 0; i++) {
637 if (bit_map[i].count > 1) {
638 fprintf(stderr, "0x%016lx: %12d (%s)\n",
639 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
640 }
641 }
642 }
643
644 static inline void
645 brw_upload_programs(struct brw_context *brw,
646 enum brw_pipeline pipeline)
647 {
648 if (pipeline == BRW_RENDER_PIPELINE) {
649 brw_upload_vs_prog(brw);
650
651 if (brw->gen < 6)
652 brw_upload_ff_gs_prog(brw);
653 else
654 brw_upload_gs_prog(brw);
655
656 /* Update the VUE map for data exiting the GS stage of the pipeline.
657 * This comes from the last enabled shader stage.
658 */
659 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
660 bool old_separate = brw->vue_map_geom_out.separate;
661 if (brw->geometry_program)
662 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
663 else
664 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
665
666 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
667 if (old_slots != brw->vue_map_geom_out.slots_valid ||
668 old_separate != brw->vue_map_geom_out.separate)
669 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
670
671 brw_upload_wm_prog(brw);
672 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
673 brw_upload_cs_prog(brw);
674 }
675 }
676
677 static inline void
678 merge_ctx_state(struct brw_context *brw,
679 struct brw_state_flags *state)
680 {
681 state->mesa |= brw->NewGLState;
682 state->brw |= brw->ctx.NewDriverState;
683 }
684
685 static inline void
686 check_and_emit_atom(struct brw_context *brw,
687 struct brw_state_flags *state,
688 const struct brw_tracked_state *atom)
689 {
690 if (check_state(state, &atom->dirty)) {
691 atom->emit(brw);
692 merge_ctx_state(brw, state);
693 }
694 }
695
696 static inline void
697 brw_upload_pipeline_state(struct brw_context *brw,
698 enum brw_pipeline pipeline)
699 {
700 struct gl_context *ctx = &brw->ctx;
701 int i;
702 static int dirty_count = 0;
703 struct brw_state_flags state = brw->state.pipelines[pipeline];
704 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
705
706 brw_select_pipeline(brw, pipeline);
707
708 if (0) {
709 /* Always re-emit all state. */
710 brw->NewGLState = ~0;
711 ctx->NewDriverState = ~0ull;
712 }
713
714 if (pipeline == BRW_RENDER_PIPELINE) {
715 if (brw->fragment_program != ctx->FragmentProgram._Current) {
716 brw->fragment_program = ctx->FragmentProgram._Current;
717 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
718 }
719
720 if (brw->geometry_program != ctx->GeometryProgram._Current) {
721 brw->geometry_program = ctx->GeometryProgram._Current;
722 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
723 }
724
725 if (brw->vertex_program != ctx->VertexProgram._Current) {
726 brw->vertex_program = ctx->VertexProgram._Current;
727 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
728 }
729 }
730
731 if (brw->compute_program != ctx->ComputeProgram._Current) {
732 brw->compute_program = ctx->ComputeProgram._Current;
733 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
734 }
735
736 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
737 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
738 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
739 }
740
741 if (brw->num_samples != fb_samples) {
742 brw->num_samples = fb_samples;
743 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
744 }
745
746 /* Exit early if no state is flagged as dirty */
747 merge_ctx_state(brw, &state);
748 if ((state.mesa | state.brw) == 0)
749 return;
750
751 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
752 if (brw->gen == 6)
753 brw_emit_post_sync_nonzero_flush(brw);
754
755 brw_upload_programs(brw, pipeline);
756 merge_ctx_state(brw, &state);
757
758 const struct brw_tracked_state *atoms =
759 brw_get_pipeline_atoms(brw, pipeline);
760 const int num_atoms = brw->num_atoms[pipeline];
761
762 if (unlikely(INTEL_DEBUG)) {
763 /* Debug version which enforces various sanity checks on the
764 * state flags which are generated and checked to help ensure
765 * state atoms are ordered correctly in the list.
766 */
767 struct brw_state_flags examined, prev;
768 memset(&examined, 0, sizeof(examined));
769 prev = state;
770
771 for (i = 0; i < num_atoms; i++) {
772 const struct brw_tracked_state *atom = &atoms[i];
773 struct brw_state_flags generated;
774
775 check_and_emit_atom(brw, &state, atom);
776
777 accumulate_state(&examined, &atom->dirty);
778
779 /* generated = (prev ^ state)
780 * if (examined & generated)
781 * fail;
782 */
783 xor_states(&generated, &prev, &state);
784 assert(!check_state(&examined, &generated));
785 prev = state;
786 }
787 }
788 else {
789 for (i = 0; i < num_atoms; i++) {
790 const struct brw_tracked_state *atom = &atoms[i];
791
792 check_and_emit_atom(brw, &state, atom);
793 }
794 }
795
796 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
797 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
798
799 brw_update_dirty_count(mesa_bits, state.mesa);
800 brw_update_dirty_count(brw_bits, state.brw);
801 if (dirty_count++ % 1000 == 0) {
802 brw_print_dirty_count(mesa_bits);
803 brw_print_dirty_count(brw_bits);
804 fprintf(stderr, "\n");
805 }
806 }
807 }
808
809 /***********************************************************************
810 * Emit all state:
811 */
812 void brw_upload_render_state(struct brw_context *brw)
813 {
814 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
815 }
816
817 static inline void
818 brw_pipeline_state_finished(struct brw_context *brw,
819 enum brw_pipeline pipeline)
820 {
821 /* Save all dirty state into the other pipelines */
822 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
823 if (i != pipeline) {
824 brw->state.pipelines[i].mesa |= brw->NewGLState;
825 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
826 } else {
827 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
828 }
829 }
830
831 brw->NewGLState = 0;
832 brw->ctx.NewDriverState = 0ull;
833 }
834
835 /**
836 * Clear dirty bits to account for the fact that the state emitted by
837 * brw_upload_render_state() has been committed to the hardware. This is a
838 * separate call from brw_upload_render_state() because it's possible that
839 * after the call to brw_upload_render_state(), we will discover that we've
840 * run out of aperture space, and need to rewind the batch buffer to the state
841 * it had before the brw_upload_render_state() call.
842 */
843 void
844 brw_render_state_finished(struct brw_context *brw)
845 {
846 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
847 }
848
849 void
850 brw_upload_compute_state(struct brw_context *brw)
851 {
852 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
853 }
854
855 void
856 brw_compute_state_finished(struct brw_context *brw)
857 {
858 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
859 }