i965: Hook up image state upload.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_disable_stages,
229 &gen7_vs_state,
230 &gen7_gs_state,
231 &gen7_sol_state,
232 &gen7_clip_state,
233 &gen7_sbe_state,
234 &gen7_sf_state,
235 &gen7_wm_state,
236 &gen7_ps_state,
237
238 &gen6_scissor_state,
239
240 &gen7_depthbuffer,
241
242 &brw_polygon_stipple,
243 &brw_polygon_stipple_offset,
244
245 &brw_line_stipple,
246 &brw_aa_line_parameters,
247
248 &brw_drawing_rect,
249
250 &brw_indices, /* must come before brw_vertices */
251 &brw_index_buffer,
252 &brw_vertices,
253
254 &haswell_cut_index,
255 };
256
257 static const struct brw_tracked_state *gen7_compute_atoms[] =
258 {
259 &brw_state_base_address,
260 &brw_cs_image_surfaces,
261 &brw_cs_abo_surfaces,
262 &brw_cs_state,
263 };
264
265 static const struct brw_tracked_state *gen8_render_atoms[] =
266 {
267 /* Command packets: */
268 &gen8_state_base_address,
269
270 &brw_cc_vp,
271 &gen8_sf_clip_viewport,
272
273 &gen7_push_constant_space,
274 &gen7_urb,
275 &gen8_blend_state,
276 &gen6_color_calc_state,
277
278 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
279
280 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
281 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
282 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
283
284 &gen6_vs_push_constants, /* Before vs_state */
285 &gen6_gs_push_constants, /* Before gs_state */
286 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
287
288 /* Surface state setup. Must come before the VS/WM unit. The binding
289 * table upload must be last.
290 */
291 &brw_vs_pull_constants,
292 &brw_vs_ubo_surfaces,
293 &brw_vs_abo_surfaces,
294 &brw_gs_pull_constants,
295 &brw_gs_ubo_surfaces,
296 &brw_gs_abo_surfaces,
297 &brw_wm_pull_constants,
298 &brw_wm_ubo_surfaces,
299 &brw_wm_abo_surfaces,
300 &gen6_renderbuffer_surfaces,
301 &brw_texture_surfaces,
302 &brw_vs_binding_table,
303 &brw_gs_binding_table,
304 &brw_wm_binding_table,
305
306 &brw_fs_samplers,
307 &brw_vs_samplers,
308 &brw_gs_samplers,
309 &gen8_multisample_state,
310
311 &gen8_disable_stages,
312 &gen8_vs_state,
313 &gen8_gs_state,
314 &gen8_sol_state,
315 &gen6_clip_state,
316 &gen8_raster_state,
317 &gen8_sbe_state,
318 &gen8_sf_state,
319 &gen8_ps_blend,
320 &gen8_ps_extra,
321 &gen8_ps_state,
322 &gen8_wm_depth_stencil,
323 &gen8_wm_state,
324
325 &gen6_scissor_state,
326
327 &gen7_depthbuffer,
328
329 &brw_polygon_stipple,
330 &brw_polygon_stipple_offset,
331
332 &brw_line_stipple,
333 &brw_aa_line_parameters,
334
335 &brw_drawing_rect,
336
337 &gen8_vf_topology,
338
339 &brw_indices,
340 &gen8_index_buffer,
341 &gen8_vertices,
342
343 &haswell_cut_index,
344 &gen8_pma_fix,
345 };
346
347 static const struct brw_tracked_state *gen8_compute_atoms[] =
348 {
349 &gen8_state_base_address,
350 &brw_cs_image_surfaces,
351 &brw_cs_abo_surfaces,
352 &brw_cs_state,
353 };
354
355 static void
356 brw_upload_initial_gpu_state(struct brw_context *brw)
357 {
358 /* On platforms with hardware contexts, we can set our initial GPU state
359 * right away rather than doing it via state atoms. This saves a small
360 * amount of overhead on every draw call.
361 */
362 if (!brw->hw_ctx)
363 return;
364
365 if (brw->gen == 6)
366 brw_emit_post_sync_nonzero_flush(brw);
367
368 brw_upload_invariant_state(brw);
369
370 /* Recommended optimization for Victim Cache eviction in pixel backend. */
371 if (brw->gen >= 9) {
372 BEGIN_BATCH(3);
373 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
374 OUT_BATCH(GEN7_CACHE_MODE_1);
375 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
376 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
377 ADVANCE_BATCH();
378 }
379
380 if (brw->gen >= 8) {
381 gen8_emit_3dstate_sample_pattern(brw);
382 }
383 }
384
385 static inline const struct brw_tracked_state *
386 brw_get_pipeline_atoms(struct brw_context *brw,
387 enum brw_pipeline pipeline)
388 {
389 switch (pipeline) {
390 case BRW_RENDER_PIPELINE:
391 return brw->render_atoms;
392 case BRW_COMPUTE_PIPELINE:
393 return brw->compute_atoms;
394 default:
395 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
396 unreachable("Unsupported pipeline");
397 return NULL;
398 }
399 }
400
401 static void
402 brw_copy_pipeline_atoms(struct brw_context *brw,
403 enum brw_pipeline pipeline,
404 const struct brw_tracked_state **atoms,
405 int num_atoms)
406 {
407 /* This is to work around brw_context::atoms being declared const. We want
408 * it to be const, but it needs to be initialized somehow!
409 */
410 struct brw_tracked_state *context_atoms =
411 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
412
413 for (int i = 0; i < num_atoms; i++) {
414 context_atoms[i] = *atoms[i];
415 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
416 assert(context_atoms[i].emit);
417 }
418
419 brw->num_atoms[pipeline] = num_atoms;
420 }
421
422 void brw_init_state( struct brw_context *brw )
423 {
424 struct gl_context *ctx = &brw->ctx;
425
426 /* Force the first brw_select_pipeline to emit pipeline select */
427 brw->last_pipeline = BRW_NUM_PIPELINES;
428
429 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
430 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
431 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
432 ARRAY_SIZE(brw->render_atoms));
433 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
434 ARRAY_SIZE(brw->render_atoms));
435 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
436 ARRAY_SIZE(brw->compute_atoms));
437 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
438 ARRAY_SIZE(brw->compute_atoms));
439
440 brw_init_caches(brw);
441
442 if (brw->gen >= 8) {
443 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
444 gen8_render_atoms,
445 ARRAY_SIZE(gen8_render_atoms));
446 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
447 gen8_compute_atoms,
448 ARRAY_SIZE(gen8_compute_atoms));
449 } else if (brw->gen == 7) {
450 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
451 gen7_render_atoms,
452 ARRAY_SIZE(gen7_render_atoms));
453 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
454 gen7_compute_atoms,
455 ARRAY_SIZE(gen7_compute_atoms));
456 } else if (brw->gen == 6) {
457 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
458 gen6_atoms, ARRAY_SIZE(gen6_atoms));
459 } else {
460 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
461 gen4_atoms, ARRAY_SIZE(gen4_atoms));
462 }
463
464 brw_upload_initial_gpu_state(brw);
465
466 brw->NewGLState = ~0;
467 brw->ctx.NewDriverState = ~0ull;
468
469 /* ~0 is a nonsensical value which won't match anything we program, so
470 * the programming will take effect on the first time around.
471 */
472 brw->pma_stall_bits = ~0;
473
474 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
475 * dirty flags.
476 */
477 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
478
479 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
480 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
481 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
482 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
483 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
484 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
485 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
486 }
487
488
489 void brw_destroy_state( struct brw_context *brw )
490 {
491 brw_destroy_caches(brw);
492 }
493
494 /***********************************************************************
495 */
496
497 static bool
498 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
499 {
500 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
501 }
502
503 static void accumulate_state( struct brw_state_flags *a,
504 const struct brw_state_flags *b )
505 {
506 a->mesa |= b->mesa;
507 a->brw |= b->brw;
508 }
509
510
511 static void xor_states( struct brw_state_flags *result,
512 const struct brw_state_flags *a,
513 const struct brw_state_flags *b )
514 {
515 result->mesa = a->mesa ^ b->mesa;
516 result->brw = a->brw ^ b->brw;
517 }
518
519 struct dirty_bit_map {
520 uint64_t bit;
521 char *name;
522 uint32_t count;
523 };
524
525 #define DEFINE_BIT(name) {name, #name, 0}
526
527 static struct dirty_bit_map mesa_bits[] = {
528 DEFINE_BIT(_NEW_MODELVIEW),
529 DEFINE_BIT(_NEW_PROJECTION),
530 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
531 DEFINE_BIT(_NEW_COLOR),
532 DEFINE_BIT(_NEW_DEPTH),
533 DEFINE_BIT(_NEW_EVAL),
534 DEFINE_BIT(_NEW_FOG),
535 DEFINE_BIT(_NEW_HINT),
536 DEFINE_BIT(_NEW_LIGHT),
537 DEFINE_BIT(_NEW_LINE),
538 DEFINE_BIT(_NEW_PIXEL),
539 DEFINE_BIT(_NEW_POINT),
540 DEFINE_BIT(_NEW_POLYGON),
541 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
542 DEFINE_BIT(_NEW_SCISSOR),
543 DEFINE_BIT(_NEW_STENCIL),
544 DEFINE_BIT(_NEW_TEXTURE),
545 DEFINE_BIT(_NEW_TRANSFORM),
546 DEFINE_BIT(_NEW_VIEWPORT),
547 DEFINE_BIT(_NEW_ARRAY),
548 DEFINE_BIT(_NEW_RENDERMODE),
549 DEFINE_BIT(_NEW_BUFFERS),
550 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
551 DEFINE_BIT(_NEW_MULTISAMPLE),
552 DEFINE_BIT(_NEW_TRACK_MATRIX),
553 DEFINE_BIT(_NEW_PROGRAM),
554 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
555 DEFINE_BIT(_NEW_BUFFER_OBJECT),
556 DEFINE_BIT(_NEW_FRAG_CLAMP),
557 /* Avoid sign extension problems. */
558 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
559 {0, 0, 0}
560 };
561
562 static struct dirty_bit_map brw_bits[] = {
563 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
564 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
565 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
566 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
567 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
568 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
569 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
570 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
571 DEFINE_BIT(BRW_NEW_URB_FENCE),
572 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
573 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
574 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
575 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
576 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
577 DEFINE_BIT(BRW_NEW_PRIMITIVE),
578 DEFINE_BIT(BRW_NEW_CONTEXT),
579 DEFINE_BIT(BRW_NEW_PSP),
580 DEFINE_BIT(BRW_NEW_SURFACES),
581 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
582 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
583 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
584 DEFINE_BIT(BRW_NEW_INDICES),
585 DEFINE_BIT(BRW_NEW_VERTICES),
586 DEFINE_BIT(BRW_NEW_BATCH),
587 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
588 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
589 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
590 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
591 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
592 DEFINE_BIT(BRW_NEW_VUE_MAP_VS),
593 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
594 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
595 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
596 DEFINE_BIT(BRW_NEW_STATS_WM),
597 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
598 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
599 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
600 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
601 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
602 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
603 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
604 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
605 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
606 DEFINE_BIT(BRW_NEW_CC_VP),
607 DEFINE_BIT(BRW_NEW_SF_VP),
608 DEFINE_BIT(BRW_NEW_CLIP_VP),
609 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
610 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
611 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
612 {0, 0, 0}
613 };
614
615 static void
616 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
617 {
618 for (int i = 0; bit_map[i].bit != 0; i++) {
619 if (bit_map[i].bit & bits)
620 bit_map[i].count++;
621 }
622 }
623
624 static void
625 brw_print_dirty_count(struct dirty_bit_map *bit_map)
626 {
627 for (int i = 0; bit_map[i].bit != 0; i++) {
628 if (bit_map[i].count > 1) {
629 fprintf(stderr, "0x%016lx: %12d (%s)\n",
630 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
631 }
632 }
633 }
634
635 static inline void
636 brw_upload_programs(struct brw_context *brw,
637 enum brw_pipeline pipeline)
638 {
639 if (pipeline == BRW_RENDER_PIPELINE) {
640 brw_upload_vs_prog(brw);
641
642 if (brw->gen < 6)
643 brw_upload_ff_gs_prog(brw);
644 else
645 brw_upload_gs_prog(brw);
646
647 brw_upload_wm_prog(brw);
648 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
649 brw_upload_cs_prog(brw);
650 }
651 }
652
653 static inline void
654 merge_ctx_state(struct brw_context *brw,
655 struct brw_state_flags *state)
656 {
657 state->mesa |= brw->NewGLState;
658 state->brw |= brw->ctx.NewDriverState;
659 }
660
661 static inline void
662 check_and_emit_atom(struct brw_context *brw,
663 struct brw_state_flags *state,
664 const struct brw_tracked_state *atom)
665 {
666 if (check_state(state, &atom->dirty)) {
667 atom->emit(brw);
668 merge_ctx_state(brw, state);
669 }
670 }
671
672 static inline void
673 brw_upload_pipeline_state(struct brw_context *brw,
674 enum brw_pipeline pipeline)
675 {
676 struct gl_context *ctx = &brw->ctx;
677 int i;
678 static int dirty_count = 0;
679 struct brw_state_flags state = brw->state.pipelines[pipeline];
680 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
681
682 brw_select_pipeline(brw, pipeline);
683
684 if (0) {
685 /* Always re-emit all state. */
686 brw->NewGLState = ~0;
687 ctx->NewDriverState = ~0ull;
688 }
689
690 if (pipeline == BRW_RENDER_PIPELINE) {
691 if (brw->fragment_program != ctx->FragmentProgram._Current) {
692 brw->fragment_program = ctx->FragmentProgram._Current;
693 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
694 }
695
696 if (brw->geometry_program != ctx->GeometryProgram._Current) {
697 brw->geometry_program = ctx->GeometryProgram._Current;
698 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
699 }
700
701 if (brw->vertex_program != ctx->VertexProgram._Current) {
702 brw->vertex_program = ctx->VertexProgram._Current;
703 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
704 }
705 }
706
707 if (brw->compute_program != ctx->ComputeProgram._Current) {
708 brw->compute_program = ctx->ComputeProgram._Current;
709 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
710 }
711
712 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
713 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
714 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
715 }
716
717 if (brw->num_samples != fb_samples) {
718 brw->num_samples = fb_samples;
719 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
720 }
721
722 /* Exit early if no state is flagged as dirty */
723 merge_ctx_state(brw, &state);
724 if ((state.mesa | state.brw) == 0)
725 return;
726
727 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
728 if (brw->gen == 6)
729 brw_emit_post_sync_nonzero_flush(brw);
730
731 brw_upload_programs(brw, pipeline);
732 merge_ctx_state(brw, &state);
733
734 const struct brw_tracked_state *atoms =
735 brw_get_pipeline_atoms(brw, pipeline);
736 const int num_atoms = brw->num_atoms[pipeline];
737
738 if (unlikely(INTEL_DEBUG)) {
739 /* Debug version which enforces various sanity checks on the
740 * state flags which are generated and checked to help ensure
741 * state atoms are ordered correctly in the list.
742 */
743 struct brw_state_flags examined, prev;
744 memset(&examined, 0, sizeof(examined));
745 prev = state;
746
747 for (i = 0; i < num_atoms; i++) {
748 const struct brw_tracked_state *atom = &atoms[i];
749 struct brw_state_flags generated;
750
751 check_and_emit_atom(brw, &state, atom);
752
753 accumulate_state(&examined, &atom->dirty);
754
755 /* generated = (prev ^ state)
756 * if (examined & generated)
757 * fail;
758 */
759 xor_states(&generated, &prev, &state);
760 assert(!check_state(&examined, &generated));
761 prev = state;
762 }
763 }
764 else {
765 for (i = 0; i < num_atoms; i++) {
766 const struct brw_tracked_state *atom = &atoms[i];
767
768 check_and_emit_atom(brw, &state, atom);
769 }
770 }
771
772 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
773 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
774
775 brw_update_dirty_count(mesa_bits, state.mesa);
776 brw_update_dirty_count(brw_bits, state.brw);
777 if (dirty_count++ % 1000 == 0) {
778 brw_print_dirty_count(mesa_bits);
779 brw_print_dirty_count(brw_bits);
780 fprintf(stderr, "\n");
781 }
782 }
783 }
784
785 /***********************************************************************
786 * Emit all state:
787 */
788 void brw_upload_render_state(struct brw_context *brw)
789 {
790 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
791 }
792
793 static inline void
794 brw_pipeline_state_finished(struct brw_context *brw,
795 enum brw_pipeline pipeline)
796 {
797 /* Save all dirty state into the other pipelines */
798 for (int i = 0; i < BRW_NUM_PIPELINES; i++) {
799 if (i != pipeline) {
800 brw->state.pipelines[i].mesa |= brw->NewGLState;
801 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
802 } else {
803 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
804 }
805 }
806
807 brw->NewGLState = 0;
808 brw->ctx.NewDriverState = 0ull;
809 }
810
811 /**
812 * Clear dirty bits to account for the fact that the state emitted by
813 * brw_upload_render_state() has been committed to the hardware. This is a
814 * separate call from brw_upload_render_state() because it's possible that
815 * after the call to brw_upload_render_state(), we will discover that we've
816 * run out of aperture space, and need to rewind the batch buffer to the state
817 * it had before the brw_upload_render_state() call.
818 */
819 void
820 brw_render_state_finished(struct brw_context *brw)
821 {
822 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
823 }
824
825 void
826 brw_upload_compute_state(struct brw_context *brw)
827 {
828 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
829 }
830
831 void
832 brw_compute_state_finished(struct brw_context *brw)
833 {
834 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
835 }