i965: Setup pull constant state for compute programs
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_push_constant_space,
190 &gen7_urb,
191 &gen6_blend_state, /* must do before cc unit */
192 &gen6_color_calc_state, /* must do before cc unit */
193 &gen6_depth_stencil_state, /* must do before cc unit */
194
195 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
196
197 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
198 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
199 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
200
201 &gen6_vs_push_constants, /* Before vs_state */
202 &gen6_gs_push_constants, /* Before gs_state */
203 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
204
205 /* Surface state setup. Must come before the VS/WM unit. The binding
206 * table upload must be last.
207 */
208 &brw_vs_pull_constants,
209 &brw_vs_ubo_surfaces,
210 &brw_vs_abo_surfaces,
211 &brw_gs_pull_constants,
212 &brw_gs_ubo_surfaces,
213 &brw_gs_abo_surfaces,
214 &brw_wm_pull_constants,
215 &brw_wm_ubo_surfaces,
216 &brw_wm_abo_surfaces,
217 &gen6_renderbuffer_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_gs_binding_table,
221 &brw_wm_binding_table,
222
223 &brw_fs_samplers,
224 &brw_vs_samplers,
225 &brw_gs_samplers,
226 &gen6_multisample_state,
227
228 &gen7_disable_stages,
229 &gen7_vs_state,
230 &gen7_gs_state,
231 &gen7_sol_state,
232 &gen7_clip_state,
233 &gen7_sbe_state,
234 &gen7_sf_state,
235 &gen7_wm_state,
236 &gen7_ps_state,
237
238 &gen6_scissor_state,
239
240 &gen7_depthbuffer,
241
242 &brw_polygon_stipple,
243 &brw_polygon_stipple_offset,
244
245 &brw_line_stipple,
246 &brw_aa_line_parameters,
247
248 &brw_drawing_rect,
249
250 &brw_indices, /* must come before brw_vertices */
251 &brw_index_buffer,
252 &brw_vertices,
253
254 &haswell_cut_index,
255 };
256
257 static const struct brw_tracked_state *gen7_compute_atoms[] =
258 {
259 &brw_state_base_address,
260 &brw_cs_image_surfaces,
261 &gen7_cs_push_constants,
262 &brw_cs_pull_constants,
263 &brw_cs_ubo_surfaces,
264 &brw_cs_abo_surfaces,
265 &brw_texture_surfaces,
266 &brw_cs_work_groups_surface,
267 &brw_cs_state,
268 };
269
270 static const struct brw_tracked_state *gen8_render_atoms[] =
271 {
272 /* Command packets: */
273 &gen8_state_base_address,
274
275 &brw_cc_vp,
276 &gen8_sf_clip_viewport,
277
278 &gen7_push_constant_space,
279 &gen7_urb,
280 &gen8_blend_state,
281 &gen6_color_calc_state,
282
283 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
284
285 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
286 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
287 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
288
289 &gen6_vs_push_constants, /* Before vs_state */
290 &gen6_gs_push_constants, /* Before gs_state */
291 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
292
293 /* Surface state setup. Must come before the VS/WM unit. The binding
294 * table upload must be last.
295 */
296 &brw_vs_pull_constants,
297 &brw_vs_ubo_surfaces,
298 &brw_vs_abo_surfaces,
299 &brw_gs_pull_constants,
300 &brw_gs_ubo_surfaces,
301 &brw_gs_abo_surfaces,
302 &brw_wm_pull_constants,
303 &brw_wm_ubo_surfaces,
304 &brw_wm_abo_surfaces,
305 &gen6_renderbuffer_surfaces,
306 &brw_texture_surfaces,
307 &brw_vs_binding_table,
308 &brw_gs_binding_table,
309 &brw_wm_binding_table,
310
311 &brw_fs_samplers,
312 &brw_vs_samplers,
313 &brw_gs_samplers,
314 &gen8_multisample_state,
315
316 &gen8_disable_stages,
317 &gen8_vs_state,
318 &gen8_gs_state,
319 &gen8_sol_state,
320 &gen6_clip_state,
321 &gen8_raster_state,
322 &gen8_sbe_state,
323 &gen8_sf_state,
324 &gen8_ps_blend,
325 &gen8_ps_extra,
326 &gen8_ps_state,
327 &gen8_wm_depth_stencil,
328 &gen8_wm_state,
329
330 &gen6_scissor_state,
331
332 &gen7_depthbuffer,
333
334 &brw_polygon_stipple,
335 &brw_polygon_stipple_offset,
336
337 &brw_line_stipple,
338 &brw_aa_line_parameters,
339
340 &brw_drawing_rect,
341
342 &gen8_vf_topology,
343
344 &brw_indices,
345 &gen8_index_buffer,
346 &gen8_vertices,
347
348 &haswell_cut_index,
349 &gen8_pma_fix,
350 };
351
352 static const struct brw_tracked_state *gen8_compute_atoms[] =
353 {
354 &gen8_state_base_address,
355 &brw_cs_image_surfaces,
356 &gen7_cs_push_constants,
357 &brw_cs_pull_constants,
358 &brw_cs_ubo_surfaces,
359 &brw_cs_abo_surfaces,
360 &brw_texture_surfaces,
361 &brw_cs_work_groups_surface,
362 &brw_cs_state,
363 };
364
365 static void
366 brw_upload_initial_gpu_state(struct brw_context *brw)
367 {
368 /* On platforms with hardware contexts, we can set our initial GPU state
369 * right away rather than doing it via state atoms. This saves a small
370 * amount of overhead on every draw call.
371 */
372 if (!brw->hw_ctx)
373 return;
374
375 if (brw->gen == 6)
376 brw_emit_post_sync_nonzero_flush(brw);
377
378 brw_upload_invariant_state(brw);
379
380 /* Recommended optimization for Victim Cache eviction in pixel backend. */
381 if (brw->gen >= 9) {
382 BEGIN_BATCH(3);
383 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
384 OUT_BATCH(GEN7_CACHE_MODE_1);
385 OUT_BATCH((GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC << 16) |
386 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
387 ADVANCE_BATCH();
388 }
389
390 if (brw->gen >= 8) {
391 gen8_emit_3dstate_sample_pattern(brw);
392 }
393 }
394
395 static inline const struct brw_tracked_state *
396 brw_get_pipeline_atoms(struct brw_context *brw,
397 enum brw_pipeline pipeline)
398 {
399 switch (pipeline) {
400 case BRW_RENDER_PIPELINE:
401 return brw->render_atoms;
402 case BRW_COMPUTE_PIPELINE:
403 return brw->compute_atoms;
404 default:
405 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
406 unreachable("Unsupported pipeline");
407 return NULL;
408 }
409 }
410
411 static void
412 brw_copy_pipeline_atoms(struct brw_context *brw,
413 enum brw_pipeline pipeline,
414 const struct brw_tracked_state **atoms,
415 int num_atoms)
416 {
417 /* This is to work around brw_context::atoms being declared const. We want
418 * it to be const, but it needs to be initialized somehow!
419 */
420 struct brw_tracked_state *context_atoms =
421 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
422
423 for (int i = 0; i < num_atoms; i++) {
424 context_atoms[i] = *atoms[i];
425 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
426 assert(context_atoms[i].emit);
427 }
428
429 brw->num_atoms[pipeline] = num_atoms;
430 }
431
432 void brw_init_state( struct brw_context *brw )
433 {
434 struct gl_context *ctx = &brw->ctx;
435
436 /* Force the first brw_select_pipeline to emit pipeline select */
437 brw->last_pipeline = BRW_NUM_PIPELINES;
438
439 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
440 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
441 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
442 ARRAY_SIZE(brw->render_atoms));
443 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
444 ARRAY_SIZE(brw->render_atoms));
445 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
446 ARRAY_SIZE(brw->compute_atoms));
447 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
448 ARRAY_SIZE(brw->compute_atoms));
449
450 brw_init_caches(brw);
451
452 if (brw->gen >= 8) {
453 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
454 gen8_render_atoms,
455 ARRAY_SIZE(gen8_render_atoms));
456 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
457 gen8_compute_atoms,
458 ARRAY_SIZE(gen8_compute_atoms));
459 } else if (brw->gen == 7) {
460 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
461 gen7_render_atoms,
462 ARRAY_SIZE(gen7_render_atoms));
463 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
464 gen7_compute_atoms,
465 ARRAY_SIZE(gen7_compute_atoms));
466 } else if (brw->gen == 6) {
467 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
468 gen6_atoms, ARRAY_SIZE(gen6_atoms));
469 } else {
470 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
471 gen4_atoms, ARRAY_SIZE(gen4_atoms));
472 }
473
474 brw_upload_initial_gpu_state(brw);
475
476 brw->NewGLState = ~0;
477 brw->ctx.NewDriverState = ~0ull;
478
479 /* ~0 is a nonsensical value which won't match anything we program, so
480 * the programming will take effect on the first time around.
481 */
482 brw->pma_stall_bits = ~0;
483
484 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
485 * dirty flags.
486 */
487 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
488
489 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
490 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
491 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
492 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
493 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
494 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
495 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
496 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
497 }
498
499
500 void brw_destroy_state( struct brw_context *brw )
501 {
502 brw_destroy_caches(brw);
503 }
504
505 /***********************************************************************
506 */
507
508 static bool
509 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
510 {
511 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
512 }
513
514 static void accumulate_state( struct brw_state_flags *a,
515 const struct brw_state_flags *b )
516 {
517 a->mesa |= b->mesa;
518 a->brw |= b->brw;
519 }
520
521
522 static void xor_states( struct brw_state_flags *result,
523 const struct brw_state_flags *a,
524 const struct brw_state_flags *b )
525 {
526 result->mesa = a->mesa ^ b->mesa;
527 result->brw = a->brw ^ b->brw;
528 }
529
530 struct dirty_bit_map {
531 uint64_t bit;
532 char *name;
533 uint32_t count;
534 };
535
536 #define DEFINE_BIT(name) {name, #name, 0}
537
538 static struct dirty_bit_map mesa_bits[] = {
539 DEFINE_BIT(_NEW_MODELVIEW),
540 DEFINE_BIT(_NEW_PROJECTION),
541 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
542 DEFINE_BIT(_NEW_COLOR),
543 DEFINE_BIT(_NEW_DEPTH),
544 DEFINE_BIT(_NEW_EVAL),
545 DEFINE_BIT(_NEW_FOG),
546 DEFINE_BIT(_NEW_HINT),
547 DEFINE_BIT(_NEW_LIGHT),
548 DEFINE_BIT(_NEW_LINE),
549 DEFINE_BIT(_NEW_PIXEL),
550 DEFINE_BIT(_NEW_POINT),
551 DEFINE_BIT(_NEW_POLYGON),
552 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
553 DEFINE_BIT(_NEW_SCISSOR),
554 DEFINE_BIT(_NEW_STENCIL),
555 DEFINE_BIT(_NEW_TEXTURE),
556 DEFINE_BIT(_NEW_TRANSFORM),
557 DEFINE_BIT(_NEW_VIEWPORT),
558 DEFINE_BIT(_NEW_ARRAY),
559 DEFINE_BIT(_NEW_RENDERMODE),
560 DEFINE_BIT(_NEW_BUFFERS),
561 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
562 DEFINE_BIT(_NEW_MULTISAMPLE),
563 DEFINE_BIT(_NEW_TRACK_MATRIX),
564 DEFINE_BIT(_NEW_PROGRAM),
565 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
566 DEFINE_BIT(_NEW_BUFFER_OBJECT),
567 DEFINE_BIT(_NEW_FRAG_CLAMP),
568 /* Avoid sign extension problems. */
569 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
570 {0, 0, 0}
571 };
572
573 static struct dirty_bit_map brw_bits[] = {
574 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
575 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
576 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
577 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
578 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
579 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
580 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
581 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
582 DEFINE_BIT(BRW_NEW_URB_FENCE),
583 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
584 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
585 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
586 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
587 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
588 DEFINE_BIT(BRW_NEW_PRIMITIVE),
589 DEFINE_BIT(BRW_NEW_CONTEXT),
590 DEFINE_BIT(BRW_NEW_PSP),
591 DEFINE_BIT(BRW_NEW_SURFACES),
592 DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
593 DEFINE_BIT(BRW_NEW_GS_BINDING_TABLE),
594 DEFINE_BIT(BRW_NEW_PS_BINDING_TABLE),
595 DEFINE_BIT(BRW_NEW_INDICES),
596 DEFINE_BIT(BRW_NEW_VERTICES),
597 DEFINE_BIT(BRW_NEW_BATCH),
598 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
599 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
600 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
601 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
602 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
603 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
604 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
605 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
606 DEFINE_BIT(BRW_NEW_STATS_WM),
607 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
608 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
609 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
610 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
611 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
612 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
613 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
614 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
615 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
616 DEFINE_BIT(BRW_NEW_CC_VP),
617 DEFINE_BIT(BRW_NEW_SF_VP),
618 DEFINE_BIT(BRW_NEW_CLIP_VP),
619 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
620 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
621 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
622 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
623 {0, 0, 0}
624 };
625
626 static void
627 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
628 {
629 for (int i = 0; bit_map[i].bit != 0; i++) {
630 if (bit_map[i].bit & bits)
631 bit_map[i].count++;
632 }
633 }
634
635 static void
636 brw_print_dirty_count(struct dirty_bit_map *bit_map)
637 {
638 for (int i = 0; bit_map[i].bit != 0; i++) {
639 if (bit_map[i].count > 1) {
640 fprintf(stderr, "0x%016lx: %12d (%s)\n",
641 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
642 }
643 }
644 }
645
646 static inline void
647 brw_upload_programs(struct brw_context *brw,
648 enum brw_pipeline pipeline)
649 {
650 if (pipeline == BRW_RENDER_PIPELINE) {
651 brw_upload_vs_prog(brw);
652
653 if (brw->gen < 6)
654 brw_upload_ff_gs_prog(brw);
655 else
656 brw_upload_gs_prog(brw);
657
658 /* Update the VUE map for data exiting the GS stage of the pipeline.
659 * This comes from the last enabled shader stage.
660 */
661 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
662 bool old_separate = brw->vue_map_geom_out.separate;
663 if (brw->geometry_program)
664 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
665 else
666 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
667
668 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
669 if (old_slots != brw->vue_map_geom_out.slots_valid ||
670 old_separate != brw->vue_map_geom_out.separate)
671 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
672
673 brw_upload_wm_prog(brw);
674 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
675 brw_upload_cs_prog(brw);
676 }
677 }
678
679 static inline void
680 merge_ctx_state(struct brw_context *brw,
681 struct brw_state_flags *state)
682 {
683 state->mesa |= brw->NewGLState;
684 state->brw |= brw->ctx.NewDriverState;
685 }
686
687 static inline void
688 check_and_emit_atom(struct brw_context *brw,
689 struct brw_state_flags *state,
690 const struct brw_tracked_state *atom)
691 {
692 if (check_state(state, &atom->dirty)) {
693 atom->emit(brw);
694 merge_ctx_state(brw, state);
695 }
696 }
697
698 static inline void
699 brw_upload_pipeline_state(struct brw_context *brw,
700 enum brw_pipeline pipeline)
701 {
702 struct gl_context *ctx = &brw->ctx;
703 int i;
704 static int dirty_count = 0;
705 struct brw_state_flags state = brw->state.pipelines[pipeline];
706 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
707
708 brw_select_pipeline(brw, pipeline);
709
710 if (0) {
711 /* Always re-emit all state. */
712 brw->NewGLState = ~0;
713 ctx->NewDriverState = ~0ull;
714 }
715
716 if (pipeline == BRW_RENDER_PIPELINE) {
717 if (brw->fragment_program != ctx->FragmentProgram._Current) {
718 brw->fragment_program = ctx->FragmentProgram._Current;
719 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
720 }
721
722 if (brw->geometry_program != ctx->GeometryProgram._Current) {
723 brw->geometry_program = ctx->GeometryProgram._Current;
724 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
725 }
726
727 if (brw->vertex_program != ctx->VertexProgram._Current) {
728 brw->vertex_program = ctx->VertexProgram._Current;
729 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
730 }
731 }
732
733 if (brw->compute_program != ctx->ComputeProgram._Current) {
734 brw->compute_program = ctx->ComputeProgram._Current;
735 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
736 }
737
738 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
739 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
740 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
741 }
742
743 if (brw->num_samples != fb_samples) {
744 brw->num_samples = fb_samples;
745 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
746 }
747
748 /* Exit early if no state is flagged as dirty */
749 merge_ctx_state(brw, &state);
750 if ((state.mesa | state.brw) == 0)
751 return;
752
753 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
754 if (brw->gen == 6)
755 brw_emit_post_sync_nonzero_flush(brw);
756
757 brw_upload_programs(brw, pipeline);
758 merge_ctx_state(brw, &state);
759
760 const struct brw_tracked_state *atoms =
761 brw_get_pipeline_atoms(brw, pipeline);
762 const int num_atoms = brw->num_atoms[pipeline];
763
764 if (unlikely(INTEL_DEBUG)) {
765 /* Debug version which enforces various sanity checks on the
766 * state flags which are generated and checked to help ensure
767 * state atoms are ordered correctly in the list.
768 */
769 struct brw_state_flags examined, prev;
770 memset(&examined, 0, sizeof(examined));
771 prev = state;
772
773 for (i = 0; i < num_atoms; i++) {
774 const struct brw_tracked_state *atom = &atoms[i];
775 struct brw_state_flags generated;
776
777 check_and_emit_atom(brw, &state, atom);
778
779 accumulate_state(&examined, &atom->dirty);
780
781 /* generated = (prev ^ state)
782 * if (examined & generated)
783 * fail;
784 */
785 xor_states(&generated, &prev, &state);
786 assert(!check_state(&examined, &generated));
787 prev = state;
788 }
789 }
790 else {
791 for (i = 0; i < num_atoms; i++) {
792 const struct brw_tracked_state *atom = &atoms[i];
793
794 check_and_emit_atom(brw, &state, atom);
795 }
796 }
797
798 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
799 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
800
801 brw_update_dirty_count(mesa_bits, state.mesa);
802 brw_update_dirty_count(brw_bits, state.brw);
803 if (dirty_count++ % 1000 == 0) {
804 brw_print_dirty_count(mesa_bits);
805 brw_print_dirty_count(brw_bits);
806 fprintf(stderr, "\n");
807 }
808 }
809 }
810
811 /***********************************************************************
812 * Emit all state:
813 */
814 void brw_upload_render_state(struct brw_context *brw)
815 {
816 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
817 }
818
819 static inline void
820 brw_pipeline_state_finished(struct brw_context *brw,
821 enum brw_pipeline pipeline)
822 {
823 /* Save all dirty state into the other pipelines */
824 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
825 if (i != pipeline) {
826 brw->state.pipelines[i].mesa |= brw->NewGLState;
827 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
828 } else {
829 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
830 }
831 }
832
833 brw->NewGLState = 0;
834 brw->ctx.NewDriverState = 0ull;
835 }
836
837 /**
838 * Clear dirty bits to account for the fact that the state emitted by
839 * brw_upload_render_state() has been committed to the hardware. This is a
840 * separate call from brw_upload_render_state() because it's possible that
841 * after the call to brw_upload_render_state(), we will discover that we've
842 * run out of aperture space, and need to rewind the batch buffer to the state
843 * it had before the brw_upload_render_state() call.
844 */
845 void
846 brw_render_state_finished(struct brw_context *brw)
847 {
848 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
849 }
850
851 void
852 brw_upload_compute_state(struct brw_context *brw)
853 {
854 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
855 }
856
857 void
858 brw_compute_state_finished(struct brw_context *brw)
859 {
860 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
861 }