i965/nir/vec4: Prepare source and destination registers for ALU operations
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &gen6_vs_push_constants, /* Before vs_state */
198 &gen6_gs_push_constants, /* Before gs_state */
199 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
200
201 /* Surface state setup. Must come before the VS/WM unit. The binding
202 * table upload must be last.
203 */
204 &brw_vs_pull_constants,
205 &brw_vs_ubo_surfaces,
206 &brw_vs_abo_surfaces,
207 &brw_gs_pull_constants,
208 &brw_gs_ubo_surfaces,
209 &brw_gs_abo_surfaces,
210 &brw_wm_pull_constants,
211 &brw_wm_ubo_surfaces,
212 &brw_wm_abo_surfaces,
213 &gen6_renderbuffer_surfaces,
214 &brw_texture_surfaces,
215 &brw_vs_binding_table,
216 &brw_gs_binding_table,
217 &brw_wm_binding_table,
218
219 &brw_fs_samplers,
220 &brw_vs_samplers,
221 &brw_gs_samplers,
222 &gen6_multisample_state,
223
224 &gen7_disable_stages,
225 &gen7_vs_state,
226 &gen7_gs_state,
227 &gen7_sol_state,
228 &gen7_clip_state,
229 &gen7_sbe_state,
230 &gen7_sf_state,
231 &gen7_wm_state,
232 &gen7_ps_state,
233
234 &gen6_scissor_state,
235
236 &gen7_depthbuffer,
237
238 &brw_polygon_stipple,
239 &brw_polygon_stipple_offset,
240
241 &brw_line_stipple,
242 &brw_aa_line_parameters,
243
244 &brw_drawing_rect,
245
246 &brw_indices, /* must come before brw_vertices */
247 &brw_index_buffer,
248 &brw_vertices,
249
250 &haswell_cut_index,
251 };
252
253 static const struct brw_tracked_state *gen7_compute_atoms[] =
254 {
255 &brw_state_base_address,
256 &brw_cs_abo_surfaces,
257 &brw_cs_state,
258 };
259
260 static const struct brw_tracked_state *gen8_render_atoms[] =
261 {
262 /* Command packets: */
263 &gen8_state_base_address,
264
265 &brw_cc_vp,
266 &gen8_sf_clip_viewport,
267
268 &gen7_push_constant_space,
269 &gen7_urb,
270 &gen8_blend_state,
271 &gen6_color_calc_state,
272
273 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
274
275 &gen6_vs_push_constants, /* Before vs_state */
276 &gen6_gs_push_constants, /* Before gs_state */
277 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
278
279 /* Surface state setup. Must come before the VS/WM unit. The binding
280 * table upload must be last.
281 */
282 &brw_vs_pull_constants,
283 &brw_vs_ubo_surfaces,
284 &brw_vs_abo_surfaces,
285 &brw_gs_pull_constants,
286 &brw_gs_ubo_surfaces,
287 &brw_gs_abo_surfaces,
288 &brw_wm_pull_constants,
289 &brw_wm_ubo_surfaces,
290 &brw_wm_abo_surfaces,
291 &gen6_renderbuffer_surfaces,
292 &brw_texture_surfaces,
293 &brw_vs_binding_table,
294 &brw_gs_binding_table,
295 &brw_wm_binding_table,
296
297 &brw_fs_samplers,
298 &brw_vs_samplers,
299 &brw_gs_samplers,
300 &gen8_multisample_state,
301
302 &gen8_disable_stages,
303 &gen8_vs_state,
304 &gen8_gs_state,
305 &gen8_sol_state,
306 &gen6_clip_state,
307 &gen8_raster_state,
308 &gen8_sbe_state,
309 &gen8_sf_state,
310 &gen8_ps_blend,
311 &gen8_ps_extra,
312 &gen8_ps_state,
313 &gen8_wm_depth_stencil,
314 &gen8_wm_state,
315
316 &gen6_scissor_state,
317
318 &gen7_depthbuffer,
319
320 &brw_polygon_stipple,
321 &brw_polygon_stipple_offset,
322
323 &brw_line_stipple,
324 &brw_aa_line_parameters,
325
326 &brw_drawing_rect,
327
328 &gen8_vf_topology,
329
330 &brw_indices,
331 &gen8_index_buffer,
332 &gen8_vertices,
333
334 &haswell_cut_index,
335 &gen8_pma_fix,
336 };
337
338 static const struct brw_tracked_state *gen8_compute_atoms[] =
339 {
340 &gen8_state_base_address,
341 &brw_cs_abo_surfaces,
342 &brw_cs_state,
343 };
344
345 static void
346 brw_upload_initial_gpu_state(struct brw_context *brw)
347 {
348 /* On platforms with hardware contexts, we can set our initial GPU state
349 * right away rather than doing it via state atoms. This saves a small
350 * amount of overhead on every draw call.
351 */
352 if (!brw->hw_ctx)
353 return;
354
355 if (brw->gen == 6)
356 brw_emit_post_sync_nonzero_flush(brw);
357
358 brw_upload_invariant_state(brw);
359
360 /* Recommended optimization for Victim Cache eviction in pixel backend. */
361 if (brw->gen >= 9) {
362 BEGIN_BATCH(3);
363 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
364 OUT_BATCH(GEN7_CACHE_MODE_1);
365 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
366 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
367 ADVANCE_BATCH();
368 }
369
370 if (brw->gen >= 8) {
371 gen8_emit_3dstate_sample_pattern(brw);
372 }
373 }
374
375 static inline const struct brw_tracked_state *
376 brw_get_pipeline_atoms(struct brw_context *brw,
377 enum brw_pipeline pipeline)
378 {
379 switch (pipeline) {
380 case BRW_RENDER_PIPELINE:
381 return brw->render_atoms;
382 case BRW_COMPUTE_PIPELINE:
383 return brw->compute_atoms;
384 default:
385 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
386 unreachable("Unsupported pipeline");
387 return NULL;
388 }
389 }
390
391 static void
392 brw_copy_pipeline_atoms(struct brw_context *brw,
393 enum brw_pipeline pipeline,
394 const struct brw_tracked_state **atoms,
395 int num_atoms)
396 {
397 /* This is to work around brw_context::atoms being declared const. We want
398 * it to be const, but it needs to be initialized somehow!
399 */
400 struct brw_tracked_state *context_atoms =
401 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
402
403 for (int i = 0; i < num_atoms; i++) {
404 context_atoms[i] = *atoms[i];
405 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
406 assert(context_atoms[i].emit);
407 }
408
409 brw->num_atoms[pipeline] = num_atoms;
410 }
411
412 void brw_init_state( struct brw_context *brw )
413 {
414 struct gl_context *ctx = &brw->ctx;
415
416 /* Force the first brw_select_pipeline to emit pipeline select */
417 brw->last_pipeline = BRW_NUM_PIPELINES;
418
419 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
420 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
421 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
422 ARRAY_SIZE(brw->render_atoms));
423 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
424 ARRAY_SIZE(brw->render_atoms));
425 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
426 ARRAY_SIZE(brw->compute_atoms));
427 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
428 ARRAY_SIZE(brw->compute_atoms));
429
430 brw_init_caches(brw);
431
432 if (brw->gen >= 8) {
433 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
434 gen8_render_atoms,
435 ARRAY_SIZE(gen8_render_atoms));
436 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
437 gen8_compute_atoms,
438 ARRAY_SIZE(gen8_compute_atoms));
439 } else if (brw->gen == 7) {
440 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
441 gen7_render_atoms,
442 ARRAY_SIZE(gen7_render_atoms));
443 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
444 gen7_compute_atoms,
445 ARRAY_SIZE(gen7_compute_atoms));
446 } else if (brw->gen == 6) {
447 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
448 gen6_atoms, ARRAY_SIZE(gen6_atoms));
449 } else {
450 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
451 gen4_atoms, ARRAY_SIZE(gen4_atoms));
452 }
453
454 brw_upload_initial_gpu_state(brw);
455
456 brw->NewGLState = ~0;
457 brw->ctx.NewDriverState = ~0ull;
458
459 /* ~0 is a nonsensical value which won't match anything we program, so
460 * the programming will take effect on the first time around.
461 */
462 brw->pma_stall_bits = ~0;
463
464 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
465 * dirty flags.
466 */
467 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
468
469 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
470 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
471 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
472 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
473 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
474 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
475 }
476
477
478 void brw_destroy_state( struct brw_context *brw )
479 {
480 brw_destroy_caches(brw);
481 }
482
483 /***********************************************************************
484 */
485
486 static bool
487 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
488 {
489 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
490 }
491
492 static void accumulate_state( struct brw_state_flags *a,
493 const struct brw_state_flags *b )
494 {
495 a->mesa |= b->mesa;
496 a->brw |= b->brw;
497 }
498
499
500 static void xor_states( struct brw_state_flags *result,
501 const struct brw_state_flags *a,
502 const struct brw_state_flags *b )
503 {
504 result->mesa = a->mesa ^ b->mesa;
505 result->brw = a->brw ^ b->brw;
506 }
507
508 struct dirty_bit_map {
509 uint64_t bit;
510 char *name;
511 uint32_t count;
512 };
513
514 #define DEFINE_BIT(name) {name, #name, 0}
515
516 static struct dirty_bit_map mesa_bits[] = {
517 DEFINE_BIT(_NEW_MODELVIEW),
518 DEFINE_BIT(_NEW_PROJECTION),
519 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
520 DEFINE_BIT(_NEW_COLOR),
521 DEFINE_BIT(_NEW_DEPTH),
522 DEFINE_BIT(_NEW_EVAL),
523 DEFINE_BIT(_NEW_FOG),
524 DEFINE_BIT(_NEW_HINT),
525 DEFINE_BIT(_NEW_LIGHT),
526 DEFINE_BIT(_NEW_LINE),
527 DEFINE_BIT(_NEW_PIXEL),
528 DEFINE_BIT(_NEW_POINT),
529 DEFINE_BIT(_NEW_POLYGON),
530 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
531 DEFINE_BIT(_NEW_SCISSOR),
532 DEFINE_BIT(_NEW_STENCIL),
533 DEFINE_BIT(_NEW_TEXTURE),
534 DEFINE_BIT(_NEW_TRANSFORM),
535 DEFINE_BIT(_NEW_VIEWPORT),
536 DEFINE_BIT(_NEW_ARRAY),
537 DEFINE_BIT(_NEW_RENDERMODE),
538 DEFINE_BIT(_NEW_BUFFERS),
539 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
540 DEFINE_BIT(_NEW_MULTISAMPLE),
541 DEFINE_BIT(_NEW_TRACK_MATRIX),
542 DEFINE_BIT(_NEW_PROGRAM),
543 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
544 DEFINE_BIT(_NEW_BUFFER_OBJECT),
545 DEFINE_BIT(_NEW_FRAG_CLAMP),
546 /* Avoid sign extension problems. */
547 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
548 {0, 0, 0}
549 };
550
551 static struct dirty_bit_map brw_bits[] = {
552 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
553 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
554 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
555 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
556 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
557 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
558 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
559 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
560 DEFINE_BIT(BRW_NEW_URB_FENCE),
561 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
562 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
563 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
564 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
565 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
566 DEFINE_BIT(BRW_NEW_PRIMITIVE),
567 DEFINE_BIT(BRW_NEW_CONTEXT),
568 DEFINE_BIT(BRW_NEW_PSP),
569 DEFINE_BIT(BRW_NEW_SURFACES),
570 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
571 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
572 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
573 DEFINE_BIT(BRW_NEW_INDICES),
574 DEFINE_BIT(BRW_NEW_VERTICES),
575 DEFINE_BIT(BRW_NEW_BATCH),
576 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
577 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
578 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
579 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
580 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
581 DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
582 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
583 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
584 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
585 DEFINE_BIT(BRW_NEW_STATS_WM),
586 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
587 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
588 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
589 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
590 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
591 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
592 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
593 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
594 DEFINE_BIT(BRW_NEW_CC_VP),
595 DEFINE_BIT(BRW_NEW_SF_VP),
596 DEFINE_BIT(BRW_NEW_CLIP_VP),
597 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
598 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
599 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
600 {0, 0, 0}
601 };
602
603 static void
604 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
605 {
606 for (int i = 0; bit_map[i].bit != 0; i++) {
607 if (bit_map[i].bit & bits)
608 bit_map[i].count++;
609 }
610 }
611
612 static void
613 brw_print_dirty_count(struct dirty_bit_map *bit_map)
614 {
615 for (int i = 0; bit_map[i].bit != 0; i++) {
616 if (bit_map[i].count > 1) {
617 fprintf(stderr, "0x%016lx: %12d (%s)\n",
618 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
619 }
620 }
621 }
622
623 static inline void
624 brw_upload_programs(struct brw_context *brw,
625 enum brw_pipeline pipeline)
626 {
627 if (pipeline == BRW_RENDER_PIPELINE) {
628 brw_upload_vs_prog(brw);
629
630 if (brw->gen < 6)
631 brw_upload_ff_gs_prog(brw);
632 else
633 brw_upload_gs_prog(brw);
634
635 brw_upload_wm_prog(brw);
636 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
637 brw_upload_cs_prog(brw);
638 }
639 }
640
641 static inline void
642 merge_ctx_state(struct brw_context *brw,
643 struct brw_state_flags *state)
644 {
645 state->mesa |= brw->NewGLState;
646 state->brw |= brw->ctx.NewDriverState;
647 }
648
649 static inline void
650 check_and_emit_atom(struct brw_context *brw,
651 struct brw_state_flags *state,
652 const struct brw_tracked_state *atom)
653 {
654 if (check_state(state, &atom->dirty)) {
655 atom->emit(brw);
656 merge_ctx_state(brw, state);
657 }
658 }
659
660 static inline void
661 brw_upload_pipeline_state(struct brw_context *brw,
662 enum brw_pipeline pipeline)
663 {
664 struct gl_context *ctx = &brw->ctx;
665 int i;
666 static int dirty_count = 0;
667 struct brw_state_flags state = brw->state.pipelines[pipeline];
668 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
669
670 brw_select_pipeline(brw, pipeline);
671
672 if (0) {
673 /* Always re-emit all state. */
674 brw->NewGLState = ~0;
675 ctx->NewDriverState = ~0ull;
676 }
677
678 if (pipeline == BRW_RENDER_PIPELINE) {
679 if (brw->fragment_program != ctx->FragmentProgram._Current) {
680 brw->fragment_program = ctx->FragmentProgram._Current;
681 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
682 }
683
684 if (brw->geometry_program != ctx->GeometryProgram._Current) {
685 brw->geometry_program = ctx->GeometryProgram._Current;
686 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
687 }
688
689 if (brw->vertex_program != ctx->VertexProgram._Current) {
690 brw->vertex_program = ctx->VertexProgram._Current;
691 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
692 }
693 }
694
695 if (brw->compute_program != ctx->ComputeProgram._Current) {
696 brw->compute_program = ctx->ComputeProgram._Current;
697 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
698 }
699
700 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
701 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
702 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
703 }
704
705 if (brw->num_samples != fb_samples) {
706 brw->num_samples = fb_samples;
707 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
708 }
709
710 /* Exit early if no state is flagged as dirty */
711 merge_ctx_state(brw, &state);
712 if ((state.mesa | state.brw) == 0)
713 return;
714
715 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
716 if (brw->gen == 6)
717 brw_emit_post_sync_nonzero_flush(brw);
718
719 brw_upload_programs(brw, pipeline);
720 merge_ctx_state(brw, &state);
721
722 const struct brw_tracked_state *atoms =
723 brw_get_pipeline_atoms(brw, pipeline);
724 const int num_atoms = brw->num_atoms[pipeline];
725
726 if (unlikely(INTEL_DEBUG)) {
727 /* Debug version which enforces various sanity checks on the
728 * state flags which are generated and checked to help ensure
729 * state atoms are ordered correctly in the list.
730 */
731 struct brw_state_flags examined, prev;
732 memset(&examined, 0, sizeof(examined));
733 prev = state;
734
735 for (i = 0; i < num_atoms; i++) {
736 const struct brw_tracked_state *atom = &atoms[i];
737 struct brw_state_flags generated;
738
739 check_and_emit_atom(brw, &state, atom);
740
741 accumulate_state(&examined, &atom->dirty);
742
743 /* generated = (prev ^ state)
744 * if (examined & generated)
745 * fail;
746 */
747 xor_states(&generated, &prev, &state);
748 assert(!check_state(&examined, &generated));
749 prev = state;
750 }
751 }
752 else {
753 for (i = 0; i < num_atoms; i++) {
754 const struct brw_tracked_state *atom = &atoms[i];
755
756 check_and_emit_atom(brw, &state, atom);
757 }
758 }
759
760 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
761 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
762
763 brw_update_dirty_count(mesa_bits, state.mesa);
764 brw_update_dirty_count(brw_bits, state.brw);
765 if (dirty_count++ % 1000 == 0) {
766 brw_print_dirty_count(mesa_bits);
767 brw_print_dirty_count(brw_bits);
768 fprintf(stderr, "\n");
769 }
770 }
771 }
772
773 /***********************************************************************
774 * Emit all state:
775 */
776 void brw_upload_render_state(struct brw_context *brw)
777 {
778 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
779 }
780
781 static inline void
782 brw_pipeline_state_finished(struct brw_context *brw,
783 enum brw_pipeline pipeline)
784 {
785 /* Save all dirty state into the other pipelines */
786 for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
787 if (i != pipeline) {
788 brw->state.pipelines[i].mesa |= brw->NewGLState;
789 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
790 } else {
791 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
792 }
793 }
794
795 brw->NewGLState = 0;
796 brw->ctx.NewDriverState = 0ull;
797 }
798
799 /**
800 * Clear dirty bits to account for the fact that the state emitted by
801 * brw_upload_render_state() has been committed to the hardware. This is a
802 * separate call from brw_upload_render_state() because it's possible that
803 * after the call to brw_upload_render_state(), we will discover that we've
804 * run out of aperture space, and need to rewind the batch buffer to the state
805 * it had before the brw_upload_render_state() call.
806 */
807 void
808 brw_render_state_finished(struct brw_context *brw)
809 {
810 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
811 }
812
813 void
814 brw_upload_compute_state(struct brw_context *brw)
815 {
816 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
817 }
818
819 void
820 brw_compute_state_finished(struct brw_context *brw)
821 {
822 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
823 }