i965/cs: Setup surface binding for gl_NumWorkGroups
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_disable_stages,
229 &gen7_vs_state,
230 &gen7_gs_state,
231 &gen7_sol_state,
232 &gen7_clip_state,
233 &gen7_sbe_state,
234 &gen7_sf_state,
235 &gen7_wm_state,
236 &gen7_ps_state,
237
238 &gen6_scissor_state,
239
240 &gen7_depthbuffer,
241
242 &brw_polygon_stipple,
243 &brw_polygon_stipple_offset,
244
245 &brw_line_stipple,
246 &brw_aa_line_parameters,
247
248 &brw_drawing_rect,
249
250 &brw_indices, /* must come before brw_vertices */
251 &brw_index_buffer,
252 &brw_vertices,
253
254 &haswell_cut_index,
255 };
256
257 static const struct brw_tracked_state *gen7_compute_atoms[] =
258 {
259 &brw_state_base_address,
260 &brw_cs_image_surfaces,
261 &gen7_cs_push_constants,
262 &brw_cs_abo_surfaces,
263 &brw_texture_surfaces,
264 &brw_cs_work_groups_surface,
265 &brw_cs_state,
266 };
267
268 static const struct brw_tracked_state *gen8_render_atoms[] =
269 {
270 /* Command packets: */
271 &gen8_state_base_address,
272
273 &brw_cc_vp,
274 &gen8_sf_clip_viewport,
275
276 &gen7_push_constant_space,
277 &gen7_urb,
278 &gen8_blend_state,
279 &gen6_color_calc_state,
280
281 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
282
283 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
284 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
285 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
286
287 &gen6_vs_push_constants, /* Before vs_state */
288 &gen6_gs_push_constants, /* Before gs_state */
289 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
290
291 /* Surface state setup. Must come before the VS/WM unit. The binding
292 * table upload must be last.
293 */
294 &brw_vs_pull_constants,
295 &brw_vs_ubo_surfaces,
296 &brw_vs_abo_surfaces,
297 &brw_gs_pull_constants,
298 &brw_gs_ubo_surfaces,
299 &brw_gs_abo_surfaces,
300 &brw_wm_pull_constants,
301 &brw_wm_ubo_surfaces,
302 &brw_wm_abo_surfaces,
303 &gen6_renderbuffer_surfaces,
304 &brw_texture_surfaces,
305 &brw_vs_binding_table,
306 &brw_gs_binding_table,
307 &brw_wm_binding_table,
308
309 &brw_fs_samplers,
310 &brw_vs_samplers,
311 &brw_gs_samplers,
312 &gen8_multisample_state,
313
314 &gen8_disable_stages,
315 &gen8_vs_state,
316 &gen8_gs_state,
317 &gen8_sol_state,
318 &gen6_clip_state,
319 &gen8_raster_state,
320 &gen8_sbe_state,
321 &gen8_sf_state,
322 &gen8_ps_blend,
323 &gen8_ps_extra,
324 &gen8_ps_state,
325 &gen8_wm_depth_stencil,
326 &gen8_wm_state,
327
328 &gen6_scissor_state,
329
330 &gen7_depthbuffer,
331
332 &brw_polygon_stipple,
333 &brw_polygon_stipple_offset,
334
335 &brw_line_stipple,
336 &brw_aa_line_parameters,
337
338 &brw_drawing_rect,
339
340 &gen8_vf_topology,
341
342 &brw_indices,
343 &gen8_index_buffer,
344 &gen8_vertices,
345
346 &haswell_cut_index,
347 &gen8_pma_fix,
348 };
349
350 static const struct brw_tracked_state *gen8_compute_atoms[] =
351 {
352 &gen8_state_base_address,
353 &brw_cs_image_surfaces,
354 &gen7_cs_push_constants,
355 &brw_cs_abo_surfaces,
356 &brw_texture_surfaces,
357 &brw_cs_work_groups_surface,
358 &brw_cs_state,
359 };
360
361 static void
362 brw_upload_initial_gpu_state(struct brw_context *brw)
363 {
364 /* On platforms with hardware contexts, we can set our initial GPU state
365 * right away rather than doing it via state atoms. This saves a small
366 * amount of overhead on every draw call.
367 */
368 if (!brw->hw_ctx)
369 return;
370
371 if (brw->gen == 6)
372 brw_emit_post_sync_nonzero_flush(brw);
373
374 brw_upload_invariant_state(brw);
375
376 /* Recommended optimization for Victim Cache eviction in pixel backend. */
377 if (brw->gen >= 9) {
378 BEGIN_BATCH(3);
379 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
380 OUT_BATCH(GEN7_CACHE_MODE_1);
381 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
382 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
383 ADVANCE_BATCH();
384 }
385
386 if (brw->gen >= 8) {
387 gen8_emit_3dstate_sample_pattern(brw);
388 }
389 }
390
391 static inline const struct brw_tracked_state *
392 brw_get_pipeline_atoms(struct brw_context *brw,
393 enum brw_pipeline pipeline)
394 {
395 switch (pipeline) {
396 case BRW_RENDER_PIPELINE:
397 return brw->render_atoms;
398 case BRW_COMPUTE_PIPELINE:
399 return brw->compute_atoms;
400 default:
401 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
402 unreachable("Unsupported pipeline");
403 return NULL;
404 }
405 }
406
407 static void
408 brw_copy_pipeline_atoms(struct brw_context *brw,
409 enum brw_pipeline pipeline,
410 const struct brw_tracked_state **atoms,
411 int num_atoms)
412 {
413 /* This is to work around brw_context::atoms being declared const. We want
414 * it to be const, but it needs to be initialized somehow!
415 */
416 struct brw_tracked_state *context_atoms =
417 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
418
419 for (int i = 0; i < num_atoms; i++) {
420 context_atoms[i] = *atoms[i];
421 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
422 assert(context_atoms[i].emit);
423 }
424
425 brw->num_atoms[pipeline] = num_atoms;
426 }
427
428 void brw_init_state( struct brw_context *brw )
429 {
430 struct gl_context *ctx = &brw->ctx;
431
432 /* Force the first brw_select_pipeline to emit pipeline select */
433 brw->last_pipeline = BRW_NUM_PIPELINES;
434
435 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
436 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
437 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
438 ARRAY_SIZE(brw->render_atoms));
439 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
440 ARRAY_SIZE(brw->render_atoms));
441 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
442 ARRAY_SIZE(brw->compute_atoms));
443 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
444 ARRAY_SIZE(brw->compute_atoms));
445
446 brw_init_caches(brw);
447
448 if (brw->gen >= 8) {
449 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
450 gen8_render_atoms,
451 ARRAY_SIZE(gen8_render_atoms));
452 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
453 gen8_compute_atoms,
454 ARRAY_SIZE(gen8_compute_atoms));
455 } else if (brw->gen == 7) {
456 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
457 gen7_render_atoms,
458 ARRAY_SIZE(gen7_render_atoms));
459 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
460 gen7_compute_atoms,
461 ARRAY_SIZE(gen7_compute_atoms));
462 } else if (brw->gen == 6) {
463 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
464 gen6_atoms, ARRAY_SIZE(gen6_atoms));
465 } else {
466 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
467 gen4_atoms, ARRAY_SIZE(gen4_atoms));
468 }
469
470 brw_upload_initial_gpu_state(brw);
471
472 brw->NewGLState = ~0;
473 brw->ctx.NewDriverState = ~0ull;
474
475 /* ~0 is a nonsensical value which won't match anything we program, so
476 * the programming will take effect on the first time around.
477 */
478 brw->pma_stall_bits = ~0;
479
480 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
481 * dirty flags.
482 */
483 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
484
485 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
486 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
487 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
488 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
489 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
490 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
491 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
492 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
493 }
494
495
496 void brw_destroy_state( struct brw_context *brw )
497 {
498 brw_destroy_caches(brw);
499 }
500
501 /***********************************************************************
502 */
503
504 static bool
505 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
506 {
507 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
508 }
509
510 static void accumulate_state( struct brw_state_flags *a,
511 const struct brw_state_flags *b )
512 {
513 a->mesa |= b->mesa;
514 a->brw |= b->brw;
515 }
516
517
518 static void xor_states( struct brw_state_flags *result,
519 const struct brw_state_flags *a,
520 const struct brw_state_flags *b )
521 {
522 result->mesa = a->mesa ^ b->mesa;
523 result->brw = a->brw ^ b->brw;
524 }
525
526 struct dirty_bit_map {
527 uint64_t bit;
528 char *name;
529 uint32_t count;
530 };
531
532 #define DEFINE_BIT(name) {name, #name, 0}
533
534 static struct dirty_bit_map mesa_bits[] = {
535 DEFINE_BIT(_NEW_MODELVIEW),
536 DEFINE_BIT(_NEW_PROJECTION),
537 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
538 DEFINE_BIT(_NEW_COLOR),
539 DEFINE_BIT(_NEW_DEPTH),
540 DEFINE_BIT(_NEW_EVAL),
541 DEFINE_BIT(_NEW_FOG),
542 DEFINE_BIT(_NEW_HINT),
543 DEFINE_BIT(_NEW_LIGHT),
544 DEFINE_BIT(_NEW_LINE),
545 DEFINE_BIT(_NEW_PIXEL),
546 DEFINE_BIT(_NEW_POINT),
547 DEFINE_BIT(_NEW_POLYGON),
548 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
549 DEFINE_BIT(_NEW_SCISSOR),
550 DEFINE_BIT(_NEW_STENCIL),
551 DEFINE_BIT(_NEW_TEXTURE),
552 DEFINE_BIT(_NEW_TRANSFORM),
553 DEFINE_BIT(_NEW_VIEWPORT),
554 DEFINE_BIT(_NEW_ARRAY),
555 DEFINE_BIT(_NEW_RENDERMODE),
556 DEFINE_BIT(_NEW_BUFFERS),
557 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
558 DEFINE_BIT(_NEW_MULTISAMPLE),
559 DEFINE_BIT(_NEW_TRACK_MATRIX),
560 DEFINE_BIT(_NEW_PROGRAM),
561 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
562 DEFINE_BIT(_NEW_BUFFER_OBJECT),
563 DEFINE_BIT(_NEW_FRAG_CLAMP),
564 /* Avoid sign extension problems. */
565 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
566 {0, 0, 0}
567 };
568
569 static struct dirty_bit_map brw_bits[] = {
570 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
571 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
572 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
573 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
574 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
575 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
576 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
577 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
578 DEFINE_BIT(BRW_NEW_URB_FENCE),
579 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
580 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
581 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
582 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
583 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
584 DEFINE_BIT(BRW_NEW_PRIMITIVE),
585 DEFINE_BIT(BRW_NEW_CONTEXT),
586 DEFINE_BIT(BRW_NEW_PSP),
587 DEFINE_BIT(BRW_NEW_SURFACES),
588 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
589 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
590 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
591 DEFINE_BIT(BRW_NEW_INDICES),
592 DEFINE_BIT(BRW_NEW_VERTICES),
593 DEFINE_BIT(BRW_NEW_BATCH),
594 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
595 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
596 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
597 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
598 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
599 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
600 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
601 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
602 DEFINE_BIT(BRW_NEW_STATS_WM),
603 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
604 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
605 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
606 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
607 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
608 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
609 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
610 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
611 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
612 DEFINE_BIT(BRW_NEW_CC_VP),
613 DEFINE_BIT(BRW_NEW_SF_VP),
614 DEFINE_BIT(BRW_NEW_CLIP_VP),
615 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
616 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
617 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
618 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
619 {0, 0, 0}
620 };
621
622 static void
623 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
624 {
625 for (int i = 0; bit_map[i].bit != 0; i++) {
626 if (bit_map[i].bit & bits)
627 bit_map[i].count++;
628 }
629 }
630
631 static void
632 brw_print_dirty_count(struct dirty_bit_map *bit_map)
633 {
634 for (int i = 0; bit_map[i].bit != 0; i++) {
635 if (bit_map[i].count > 1) {
636 fprintf(stderr, "0x%016lx: %12d (%s)\n",
637 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
638 }
639 }
640 }
641
642 static inline void
643 brw_upload_programs(struct brw_context *brw,
644 enum brw_pipeline pipeline)
645 {
646 if (pipeline == BRW_RENDER_PIPELINE) {
647 brw_upload_vs_prog(brw);
648
649 if (brw->gen < 6)
650 brw_upload_ff_gs_prog(brw);
651 else
652 brw_upload_gs_prog(brw);
653
654 /* Update the VUE map for data exiting the GS stage of the pipeline.
655 * This comes from the last enabled shader stage.
656 */
657 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
658 bool old_separate = brw->vue_map_geom_out.separate;
659 if (brw->geometry_program)
660 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
661 else
662 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
663
664 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
665 if (old_slots != brw->vue_map_geom_out.slots_valid ||
666 old_separate != brw->vue_map_geom_out.separate)
667 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
668
669 brw_upload_wm_prog(brw);
670 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
671 brw_upload_cs_prog(brw);
672 }
673 }
674
675 static inline void
676 merge_ctx_state(struct brw_context *brw,
677 struct brw_state_flags *state)
678 {
679 state->mesa |= brw->NewGLState;
680 state->brw |= brw->ctx.NewDriverState;
681 }
682
683 static inline void
684 check_and_emit_atom(struct brw_context *brw,
685 struct brw_state_flags *state,
686 const struct brw_tracked_state *atom)
687 {
688 if (check_state(state, &atom->dirty)) {
689 atom->emit(brw);
690 merge_ctx_state(brw, state);
691 }
692 }
693
694 static inline void
695 brw_upload_pipeline_state(struct brw_context *brw,
696 enum brw_pipeline pipeline)
697 {
698 struct gl_context *ctx = &brw->ctx;
699 int i;
700 static int dirty_count = 0;
701 struct brw_state_flags state = brw->state.pipelines[pipeline];
702 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
703
704 brw_select_pipeline(brw, pipeline);
705
706 if (0) {
707 /* Always re-emit all state. */
708 brw->NewGLState = ~0;
709 ctx->NewDriverState = ~0ull;
710 }
711
712 if (pipeline == BRW_RENDER_PIPELINE) {
713 if (brw->fragment_program != ctx->FragmentProgram._Current) {
714 brw->fragment_program = ctx->FragmentProgram._Current;
715 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
716 }
717
718 if (brw->geometry_program != ctx->GeometryProgram._Current) {
719 brw->geometry_program = ctx->GeometryProgram._Current;
720 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
721 }
722
723 if (brw->vertex_program != ctx->VertexProgram._Current) {
724 brw->vertex_program = ctx->VertexProgram._Current;
725 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
726 }
727 }
728
729 if (brw->compute_program != ctx->ComputeProgram._Current) {
730 brw->compute_program = ctx->ComputeProgram._Current;
731 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
732 }
733
734 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
735 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
736 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
737 }
738
739 if (brw->num_samples != fb_samples) {
740 brw->num_samples = fb_samples;
741 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
742 }
743
744 /* Exit early if no state is flagged as dirty */
745 merge_ctx_state(brw, &state);
746 if ((state.mesa | state.brw) == 0)
747 return;
748
749 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
750 if (brw->gen == 6)
751 brw_emit_post_sync_nonzero_flush(brw);
752
753 brw_upload_programs(brw, pipeline);
754 merge_ctx_state(brw, &state);
755
756 const struct brw_tracked_state *atoms =
757 brw_get_pipeline_atoms(brw, pipeline);
758 const int num_atoms = brw->num_atoms[pipeline];
759
760 if (unlikely(INTEL_DEBUG)) {
761 /* Debug version which enforces various sanity checks on the
762 * state flags which are generated and checked to help ensure
763 * state atoms are ordered correctly in the list.
764 */
765 struct brw_state_flags examined, prev;
766 memset(&examined, 0, sizeof(examined));
767 prev = state;
768
769 for (i = 0; i < num_atoms; i++) {
770 const struct brw_tracked_state *atom = &atoms[i];
771 struct brw_state_flags generated;
772
773 check_and_emit_atom(brw, &state, atom);
774
775 accumulate_state(&examined, &atom->dirty);
776
777 /* generated = (prev ^ state)
778 * if (examined & generated)
779 * fail;
780 */
781 xor_states(&generated, &prev, &state);
782 assert(!check_state(&examined, &generated));
783 prev = state;
784 }
785 }
786 else {
787 for (i = 0; i < num_atoms; i++) {
788 const struct brw_tracked_state *atom = &atoms[i];
789
790 check_and_emit_atom(brw, &state, atom);
791 }
792 }
793
794 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
795 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
796
797 brw_update_dirty_count(mesa_bits, state.mesa);
798 brw_update_dirty_count(brw_bits, state.brw);
799 if (dirty_count++ % 1000 == 0) {
800 brw_print_dirty_count(mesa_bits);
801 brw_print_dirty_count(brw_bits);
802 fprintf(stderr, "\n");
803 }
804 }
805 }
806
807 /***********************************************************************
808 * Emit all state:
809 */
810 void brw_upload_render_state(struct brw_context *brw)
811 {
812 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
813 }
814
815 static inline void
816 brw_pipeline_state_finished(struct brw_context *brw,
817 enum brw_pipeline pipeline)
818 {
819 /* Save all dirty state into the other pipelines */
820 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
821 if (i != pipeline) {
822 brw->state.pipelines[i].mesa |= brw->NewGLState;
823 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
824 } else {
825 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
826 }
827 }
828
829 brw->NewGLState = 0;
830 brw->ctx.NewDriverState = 0ull;
831 }
832
833 /**
834 * Clear dirty bits to account for the fact that the state emitted by
835 * brw_upload_render_state() has been committed to the hardware. This is a
836 * separate call from brw_upload_render_state() because it's possible that
837 * after the call to brw_upload_render_state(), we will discover that we've
838 * run out of aperture space, and need to rewind the batch buffer to the state
839 * it had before the brw_upload_render_state() call.
840 */
841 void
842 brw_render_state_finished(struct brw_context *brw)
843 {
844 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
845 }
846
847 void
848 brw_upload_compute_state(struct brw_context *brw)
849 {
850 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
851 }
852
853 void
854 brw_compute_state_finished(struct brw_context *brw)
855 {
856 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
857 }