i965/bxt: Add basic Broxton infrastructure
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen6_vs_push_constants, /* Before vs_state */
196 &gen6_gs_push_constants, /* Before gs_state */
197 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
198
199 /* Surface state setup. Must come before the VS/WM unit. The binding
200 * table upload must be last.
201 */
202 &brw_vs_pull_constants,
203 &brw_vs_ubo_surfaces,
204 &brw_vs_abo_surfaces,
205 &brw_gs_pull_constants,
206 &brw_gs_ubo_surfaces,
207 &brw_gs_abo_surfaces,
208 &brw_wm_pull_constants,
209 &brw_wm_ubo_surfaces,
210 &brw_wm_abo_surfaces,
211 &gen6_renderbuffer_surfaces,
212 &brw_texture_surfaces,
213 &brw_vs_binding_table,
214 &brw_gs_binding_table,
215 &brw_wm_binding_table,
216
217 &brw_fs_samplers,
218 &brw_vs_samplers,
219 &brw_gs_samplers,
220 &gen6_multisample_state,
221
222 &gen7_disable_stages,
223 &gen7_vs_state,
224 &gen7_gs_state,
225 &gen7_sol_state,
226 &gen7_clip_state,
227 &gen7_sbe_state,
228 &gen7_sf_state,
229 &gen7_wm_state,
230 &gen7_ps_state,
231
232 &gen6_scissor_state,
233
234 &gen7_depthbuffer,
235
236 &brw_polygon_stipple,
237 &brw_polygon_stipple_offset,
238
239 &brw_line_stipple,
240 &brw_aa_line_parameters,
241
242 &brw_drawing_rect,
243
244 &brw_indices, /* must come before brw_vertices */
245 &brw_index_buffer,
246 &brw_vertices,
247
248 &haswell_cut_index,
249 };
250
251 static const struct brw_tracked_state *gen7_compute_atoms[] =
252 {
253 &brw_state_base_address,
254 &brw_cs_abo_surfaces,
255 &brw_cs_state,
256 };
257
258 static const struct brw_tracked_state *gen8_render_atoms[] =
259 {
260 /* Command packets: */
261 &gen8_state_base_address,
262
263 &brw_cc_vp,
264 &gen8_sf_clip_viewport,
265
266 &gen7_push_constant_space,
267 &gen7_urb,
268 &gen8_blend_state,
269 &gen6_color_calc_state,
270
271 &gen6_vs_push_constants, /* Before vs_state */
272 &gen6_gs_push_constants, /* Before gs_state */
273 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
274
275 /* Surface state setup. Must come before the VS/WM unit. The binding
276 * table upload must be last.
277 */
278 &brw_vs_pull_constants,
279 &brw_vs_ubo_surfaces,
280 &brw_vs_abo_surfaces,
281 &brw_gs_pull_constants,
282 &brw_gs_ubo_surfaces,
283 &brw_gs_abo_surfaces,
284 &brw_wm_pull_constants,
285 &brw_wm_ubo_surfaces,
286 &brw_wm_abo_surfaces,
287 &gen6_renderbuffer_surfaces,
288 &brw_texture_surfaces,
289 &brw_vs_binding_table,
290 &brw_gs_binding_table,
291 &brw_wm_binding_table,
292
293 &brw_fs_samplers,
294 &brw_vs_samplers,
295 &brw_gs_samplers,
296 &gen8_multisample_state,
297
298 &gen8_disable_stages,
299 &gen8_vs_state,
300 &gen8_gs_state,
301 &gen8_sol_state,
302 &gen6_clip_state,
303 &gen8_raster_state,
304 &gen8_sbe_state,
305 &gen8_sf_state,
306 &gen8_ps_blend,
307 &gen8_ps_extra,
308 &gen8_ps_state,
309 &gen8_wm_depth_stencil,
310 &gen8_wm_state,
311
312 &gen6_scissor_state,
313
314 &gen7_depthbuffer,
315
316 &brw_polygon_stipple,
317 &brw_polygon_stipple_offset,
318
319 &brw_line_stipple,
320 &brw_aa_line_parameters,
321
322 &brw_drawing_rect,
323
324 &gen8_vf_topology,
325
326 &brw_indices,
327 &gen8_index_buffer,
328 &gen8_vertices,
329
330 &haswell_cut_index,
331 &gen8_pma_fix,
332 };
333
334 static const struct brw_tracked_state *gen8_compute_atoms[] =
335 {
336 &gen8_state_base_address,
337 &brw_cs_abo_surfaces,
338 &brw_cs_state,
339 };
340
341 static void
342 brw_upload_initial_gpu_state(struct brw_context *brw)
343 {
344 /* On platforms with hardware contexts, we can set our initial GPU state
345 * right away rather than doing it via state atoms. This saves a small
346 * amount of overhead on every draw call.
347 */
348 if (!brw->hw_ctx)
349 return;
350
351 if (brw->gen == 6)
352 brw_emit_post_sync_nonzero_flush(brw);
353
354 brw_upload_invariant_state(brw);
355
356 /* Recommended optimization for Victim Cache eviction in pixel backend. */
357 if (brw->gen >= 9) {
358 BEGIN_BATCH(3);
359 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
360 OUT_BATCH(GEN7_CACHE_MODE_1);
361 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
362 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
363 ADVANCE_BATCH();
364 }
365
366 if (brw->gen >= 8) {
367 gen8_emit_3dstate_sample_pattern(brw);
368 }
369 }
370
371 static inline const struct brw_tracked_state *
372 brw_get_pipeline_atoms(struct brw_context *brw,
373 enum brw_pipeline pipeline)
374 {
375 switch (pipeline) {
376 case BRW_RENDER_PIPELINE:
377 return brw->render_atoms;
378 case BRW_COMPUTE_PIPELINE:
379 return brw->compute_atoms;
380 default:
381 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
382 unreachable("Unsupported pipeline");
383 return NULL;
384 }
385 }
386
387 static void
388 brw_copy_pipeline_atoms(struct brw_context *brw,
389 enum brw_pipeline pipeline,
390 const struct brw_tracked_state **atoms,
391 int num_atoms)
392 {
393 /* This is to work around brw_context::atoms being declared const. We want
394 * it to be const, but it needs to be initialized somehow!
395 */
396 struct brw_tracked_state *context_atoms =
397 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
398
399 for (int i = 0; i < num_atoms; i++) {
400 context_atoms[i] = *atoms[i];
401 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
402 assert(context_atoms[i].emit);
403 }
404
405 brw->num_atoms[pipeline] = num_atoms;
406 }
407
408 void brw_init_state( struct brw_context *brw )
409 {
410 struct gl_context *ctx = &brw->ctx;
411
412 /* Force the first brw_select_pipeline to emit pipeline select */
413 brw->last_pipeline = BRW_NUM_PIPELINES;
414
415 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
416 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
417 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
418 ARRAY_SIZE(brw->render_atoms));
419 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
420 ARRAY_SIZE(brw->render_atoms));
421 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
422 ARRAY_SIZE(brw->compute_atoms));
423 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
424 ARRAY_SIZE(brw->compute_atoms));
425
426 brw_init_caches(brw);
427
428 if (brw->gen >= 8) {
429 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
430 gen8_render_atoms,
431 ARRAY_SIZE(gen8_render_atoms));
432 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
433 gen8_compute_atoms,
434 ARRAY_SIZE(gen8_compute_atoms));
435 } else if (brw->gen == 7) {
436 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
437 gen7_render_atoms,
438 ARRAY_SIZE(gen7_render_atoms));
439 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
440 gen7_compute_atoms,
441 ARRAY_SIZE(gen7_compute_atoms));
442 } else if (brw->gen == 6) {
443 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
444 gen6_atoms, ARRAY_SIZE(gen6_atoms));
445 } else {
446 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
447 gen4_atoms, ARRAY_SIZE(gen4_atoms));
448 }
449
450 brw_upload_initial_gpu_state(brw);
451
452 brw->NewGLState = ~0;
453 brw->ctx.NewDriverState = ~0ull;
454
455 /* ~0 is a nonsensical value which won't match anything we program, so
456 * the programming will take effect on the first time around.
457 */
458 brw->pma_stall_bits = ~0;
459
460 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
461 * dirty flags.
462 */
463 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
464
465 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
466 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
467 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
468 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
469 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
470 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
471 }
472
473
474 void brw_destroy_state( struct brw_context *brw )
475 {
476 brw_destroy_caches(brw);
477 }
478
479 /***********************************************************************
480 */
481
482 static bool
483 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
484 {
485 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
486 }
487
488 static void accumulate_state( struct brw_state_flags *a,
489 const struct brw_state_flags *b )
490 {
491 a->mesa |= b->mesa;
492 a->brw |= b->brw;
493 }
494
495
496 static void xor_states( struct brw_state_flags *result,
497 const struct brw_state_flags *a,
498 const struct brw_state_flags *b )
499 {
500 result->mesa = a->mesa ^ b->mesa;
501 result->brw = a->brw ^ b->brw;
502 }
503
504 struct dirty_bit_map {
505 uint64_t bit;
506 char *name;
507 uint32_t count;
508 };
509
510 #define DEFINE_BIT(name) {name, #name, 0}
511
512 static struct dirty_bit_map mesa_bits[] = {
513 DEFINE_BIT(_NEW_MODELVIEW),
514 DEFINE_BIT(_NEW_PROJECTION),
515 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
516 DEFINE_BIT(_NEW_COLOR),
517 DEFINE_BIT(_NEW_DEPTH),
518 DEFINE_BIT(_NEW_EVAL),
519 DEFINE_BIT(_NEW_FOG),
520 DEFINE_BIT(_NEW_HINT),
521 DEFINE_BIT(_NEW_LIGHT),
522 DEFINE_BIT(_NEW_LINE),
523 DEFINE_BIT(_NEW_PIXEL),
524 DEFINE_BIT(_NEW_POINT),
525 DEFINE_BIT(_NEW_POLYGON),
526 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
527 DEFINE_BIT(_NEW_SCISSOR),
528 DEFINE_BIT(_NEW_STENCIL),
529 DEFINE_BIT(_NEW_TEXTURE),
530 DEFINE_BIT(_NEW_TRANSFORM),
531 DEFINE_BIT(_NEW_VIEWPORT),
532 DEFINE_BIT(_NEW_ARRAY),
533 DEFINE_BIT(_NEW_RENDERMODE),
534 DEFINE_BIT(_NEW_BUFFERS),
535 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
536 DEFINE_BIT(_NEW_MULTISAMPLE),
537 DEFINE_BIT(_NEW_TRACK_MATRIX),
538 DEFINE_BIT(_NEW_PROGRAM),
539 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
540 DEFINE_BIT(_NEW_BUFFER_OBJECT),
541 DEFINE_BIT(_NEW_FRAG_CLAMP),
542 /* Avoid sign extension problems. */
543 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
544 {0, 0, 0}
545 };
546
547 static struct dirty_bit_map brw_bits[] = {
548 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
549 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
550 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
551 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
552 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
553 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
554 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
555 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
556 DEFINE_BIT(BRW_NEW_URB_FENCE),
557 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
558 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
559 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
560 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
561 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
562 DEFINE_BIT(BRW_NEW_PRIMITIVE),
563 DEFINE_BIT(BRW_NEW_CONTEXT),
564 DEFINE_BIT(BRW_NEW_PSP),
565 DEFINE_BIT(BRW_NEW_SURFACES),
566 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
567 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
568 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
569 DEFINE_BIT(BRW_NEW_INDICES),
570 DEFINE_BIT(BRW_NEW_VERTICES),
571 DEFINE_BIT(BRW_NEW_BATCH),
572 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
573 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
574 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
575 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
576 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
577 DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
578 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
579 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
580 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
581 DEFINE_BIT(BRW_NEW_STATS_WM),
582 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
583 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
584 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
585 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
586 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
587 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
588 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
589 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
590 DEFINE_BIT(BRW_NEW_CC_VP),
591 DEFINE_BIT(BRW_NEW_SF_VP),
592 DEFINE_BIT(BRW_NEW_CLIP_VP),
593 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
594 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
595 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
596 {0, 0, 0}
597 };
598
599 static void
600 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
601 {
602 for (int i = 0; bit_map[i].bit != 0; i++) {
603 if (bit_map[i].bit & bits)
604 bit_map[i].count++;
605 }
606 }
607
608 static void
609 brw_print_dirty_count(struct dirty_bit_map *bit_map)
610 {
611 for (int i = 0; bit_map[i].bit != 0; i++) {
612 if (bit_map[i].count > 1) {
613 fprintf(stderr, "0x%016lx: %12d (%s)\n",
614 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
615 }
616 }
617 }
618
619 static inline void
620 brw_upload_programs(struct brw_context *brw,
621 enum brw_pipeline pipeline)
622 {
623 if (pipeline == BRW_RENDER_PIPELINE) {
624 brw_upload_vs_prog(brw);
625
626 if (brw->gen < 6)
627 brw_upload_ff_gs_prog(brw);
628 else
629 brw_upload_gs_prog(brw);
630
631 brw_upload_wm_prog(brw);
632 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
633 brw_upload_cs_prog(brw);
634 }
635 }
636
637 static inline void
638 merge_ctx_state(struct brw_context *brw,
639 struct brw_state_flags *state)
640 {
641 state->mesa |= brw->NewGLState;
642 state->brw |= brw->ctx.NewDriverState;
643 }
644
645 static inline void
646 check_and_emit_atom(struct brw_context *brw,
647 struct brw_state_flags *state,
648 const struct brw_tracked_state *atom)
649 {
650 if (check_state(state, &atom->dirty)) {
651 atom->emit(brw);
652 merge_ctx_state(brw, state);
653 }
654 }
655
656 static inline void
657 brw_upload_pipeline_state(struct brw_context *brw,
658 enum brw_pipeline pipeline)
659 {
660 struct gl_context *ctx = &brw->ctx;
661 int i;
662 static int dirty_count = 0;
663 struct brw_state_flags state = brw->state.pipelines[pipeline];
664 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
665
666 brw_select_pipeline(brw, pipeline);
667
668 if (0) {
669 /* Always re-emit all state. */
670 brw->NewGLState = ~0;
671 ctx->NewDriverState = ~0ull;
672 }
673
674 if (pipeline == BRW_RENDER_PIPELINE) {
675 if (brw->fragment_program != ctx->FragmentProgram._Current) {
676 brw->fragment_program = ctx->FragmentProgram._Current;
677 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
678 }
679
680 if (brw->geometry_program != ctx->GeometryProgram._Current) {
681 brw->geometry_program = ctx->GeometryProgram._Current;
682 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
683 }
684
685 if (brw->vertex_program != ctx->VertexProgram._Current) {
686 brw->vertex_program = ctx->VertexProgram._Current;
687 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
688 }
689 }
690
691 if (brw->compute_program != ctx->ComputeProgram._Current) {
692 brw->compute_program = ctx->ComputeProgram._Current;
693 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
694 }
695
696 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
697 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
698 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
699 }
700
701 if (brw->num_samples != fb_samples) {
702 brw->num_samples = fb_samples;
703 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
704 }
705
706 /* Exit early if no state is flagged as dirty */
707 merge_ctx_state(brw, &state);
708 if ((state.mesa | state.brw) == 0)
709 return;
710
711 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
712 if (brw->gen == 6)
713 brw_emit_post_sync_nonzero_flush(brw);
714
715 brw_upload_programs(brw, pipeline);
716 merge_ctx_state(brw, &state);
717
718 const struct brw_tracked_state *atoms =
719 brw_get_pipeline_atoms(brw, pipeline);
720 const int num_atoms = brw->num_atoms[pipeline];
721
722 if (unlikely(INTEL_DEBUG)) {
723 /* Debug version which enforces various sanity checks on the
724 * state flags which are generated and checked to help ensure
725 * state atoms are ordered correctly in the list.
726 */
727 struct brw_state_flags examined, prev;
728 memset(&examined, 0, sizeof(examined));
729 prev = state;
730
731 for (i = 0; i < num_atoms; i++) {
732 const struct brw_tracked_state *atom = &atoms[i];
733 struct brw_state_flags generated;
734
735 check_and_emit_atom(brw, &state, atom);
736
737 accumulate_state(&examined, &atom->dirty);
738
739 /* generated = (prev ^ state)
740 * if (examined & generated)
741 * fail;
742 */
743 xor_states(&generated, &prev, &state);
744 assert(!check_state(&examined, &generated));
745 prev = state;
746 }
747 }
748 else {
749 for (i = 0; i < num_atoms; i++) {
750 const struct brw_tracked_state *atom = &atoms[i];
751
752 check_and_emit_atom(brw, &state, atom);
753 }
754 }
755
756 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
757 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
758
759 brw_update_dirty_count(mesa_bits, state.mesa);
760 brw_update_dirty_count(brw_bits, state.brw);
761 if (dirty_count++ % 1000 == 0) {
762 brw_print_dirty_count(mesa_bits);
763 brw_print_dirty_count(brw_bits);
764 fprintf(stderr, "\n");
765 }
766 }
767 }
768
769 /***********************************************************************
770 * Emit all state:
771 */
772 void brw_upload_render_state(struct brw_context *brw)
773 {
774 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
775 }
776
777 static inline void
778 brw_pipeline_state_finished(struct brw_context *brw,
779 enum brw_pipeline pipeline)
780 {
781 /* Save all dirty state into the other pipelines */
782 for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
783 if (i != pipeline) {
784 brw->state.pipelines[i].mesa |= brw->NewGLState;
785 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
786 } else {
787 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
788 }
789 }
790
791 brw->NewGLState = 0;
792 brw->ctx.NewDriverState = 0ull;
793 }
794
795 /**
796 * Clear dirty bits to account for the fact that the state emitted by
797 * brw_upload_render_state() has been committed to the hardware. This is a
798 * separate call from brw_upload_render_state() because it's possible that
799 * after the call to brw_upload_render_state(), we will discover that we've
800 * run out of aperture space, and need to rewind the batch buffer to the state
801 * it had before the brw_upload_render_state() call.
802 */
803 void
804 brw_render_state_finished(struct brw_context *brw)
805 {
806 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
807 }
808
809 void
810 brw_upload_compute_state(struct brw_context *brw)
811 {
812 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
813 }
814
815 void
816 brw_compute_state_finished(struct brw_context *brw)
817 {
818 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
819 }