i965/wm: use binding size for ubo/ssbo when automatic size is unset
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_l3_state,
190 &gen7_push_constant_space,
191 &gen7_urb,
192 &gen6_blend_state, /* must do before cc unit */
193 &gen6_color_calc_state, /* must do before cc unit */
194 &gen6_depth_stencil_state, /* must do before cc unit */
195
196 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
197
198 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
199 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
200 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
201 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
202 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
203
204 &gen6_vs_push_constants, /* Before vs_state */
205 &gen7_tcs_push_constants,
206 &gen7_tes_push_constants,
207 &gen6_gs_push_constants, /* Before gs_state */
208 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
209
210 /* Surface state setup. Must come before the VS/WM unit. The binding
211 * table upload must be last.
212 */
213 &brw_vs_pull_constants,
214 &brw_vs_ubo_surfaces,
215 &brw_vs_abo_surfaces,
216 &brw_tcs_pull_constants,
217 &brw_tcs_ubo_surfaces,
218 &brw_tcs_abo_surfaces,
219 &brw_tes_pull_constants,
220 &brw_tes_ubo_surfaces,
221 &brw_tes_abo_surfaces,
222 &brw_gs_pull_constants,
223 &brw_gs_ubo_surfaces,
224 &brw_gs_abo_surfaces,
225 &brw_wm_pull_constants,
226 &brw_wm_ubo_surfaces,
227 &brw_wm_abo_surfaces,
228 &gen6_renderbuffer_surfaces,
229 &brw_texture_surfaces,
230 &brw_vs_binding_table,
231 &brw_tcs_binding_table,
232 &brw_tes_binding_table,
233 &brw_gs_binding_table,
234 &brw_wm_binding_table,
235
236 &brw_fs_samplers,
237 &brw_vs_samplers,
238 &brw_tcs_samplers,
239 &brw_tes_samplers,
240 &brw_gs_samplers,
241 &gen6_multisample_state,
242
243 &gen7_vs_state,
244 &gen7_hs_state,
245 &gen7_te_state,
246 &gen7_ds_state,
247 &gen7_gs_state,
248 &gen7_sol_state,
249 &gen7_clip_state,
250 &gen7_sbe_state,
251 &gen7_sf_state,
252 &gen7_wm_state,
253 &gen7_ps_state,
254
255 &gen6_scissor_state,
256
257 &gen7_depthbuffer,
258
259 &brw_polygon_stipple,
260 &brw_polygon_stipple_offset,
261
262 &brw_line_stipple,
263 &brw_aa_line_parameters,
264
265 &brw_drawing_rect,
266
267 &brw_indices, /* must come before brw_vertices */
268 &brw_index_buffer,
269 &brw_vertices,
270
271 &haswell_cut_index,
272 };
273
274 static const struct brw_tracked_state *gen7_compute_atoms[] =
275 {
276 &brw_state_base_address,
277 &gen7_l3_state,
278 &brw_cs_image_surfaces,
279 &gen7_cs_push_constants,
280 &brw_cs_pull_constants,
281 &brw_cs_ubo_surfaces,
282 &brw_cs_abo_surfaces,
283 &brw_texture_surfaces,
284 &brw_cs_work_groups_surface,
285 &brw_cs_state,
286 };
287
288 static const struct brw_tracked_state *gen8_render_atoms[] =
289 {
290 /* Command packets: */
291 &gen8_state_base_address,
292
293 &brw_cc_vp,
294 &gen8_sf_clip_viewport,
295
296 &gen7_l3_state,
297 &gen7_push_constant_space,
298 &gen7_urb,
299 &gen8_blend_state,
300 &gen6_color_calc_state,
301
302 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
303
304 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
305 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
306 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
307 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
308 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
309
310 &gen6_vs_push_constants, /* Before vs_state */
311 &gen7_tcs_push_constants,
312 &gen7_tes_push_constants,
313 &gen6_gs_push_constants, /* Before gs_state */
314 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
315
316 /* Surface state setup. Must come before the VS/WM unit. The binding
317 * table upload must be last.
318 */
319 &brw_vs_pull_constants,
320 &brw_vs_ubo_surfaces,
321 &brw_vs_abo_surfaces,
322 &brw_tcs_pull_constants,
323 &brw_tcs_ubo_surfaces,
324 &brw_tcs_abo_surfaces,
325 &brw_tes_pull_constants,
326 &brw_tes_ubo_surfaces,
327 &brw_tes_abo_surfaces,
328 &brw_gs_pull_constants,
329 &brw_gs_ubo_surfaces,
330 &brw_gs_abo_surfaces,
331 &brw_wm_pull_constants,
332 &brw_wm_ubo_surfaces,
333 &brw_wm_abo_surfaces,
334 &gen6_renderbuffer_surfaces,
335 &brw_texture_surfaces,
336 &brw_vs_binding_table,
337 &brw_tcs_binding_table,
338 &brw_tes_binding_table,
339 &brw_gs_binding_table,
340 &brw_wm_binding_table,
341
342 &brw_fs_samplers,
343 &brw_vs_samplers,
344 &brw_tcs_samplers,
345 &brw_tes_samplers,
346 &brw_gs_samplers,
347 &gen8_multisample_state,
348
349 &gen8_disable_stages,
350 &gen8_vs_state,
351 &gen8_hs_state,
352 &gen7_te_state,
353 &gen8_ds_state,
354 &gen8_gs_state,
355 &gen8_sol_state,
356 &gen6_clip_state,
357 &gen8_raster_state,
358 &gen8_sbe_state,
359 &gen8_sf_state,
360 &gen8_ps_blend,
361 &gen8_ps_extra,
362 &gen8_ps_state,
363 &gen8_wm_depth_stencil,
364 &gen8_wm_state,
365
366 &gen6_scissor_state,
367
368 &gen7_depthbuffer,
369
370 &brw_polygon_stipple,
371 &brw_polygon_stipple_offset,
372
373 &brw_line_stipple,
374 &brw_aa_line_parameters,
375
376 &brw_drawing_rect,
377
378 &gen8_vf_topology,
379
380 &brw_indices,
381 &gen8_index_buffer,
382 &gen8_vertices,
383
384 &haswell_cut_index,
385 &gen8_pma_fix,
386 };
387
388 static const struct brw_tracked_state *gen8_compute_atoms[] =
389 {
390 &gen8_state_base_address,
391 &gen7_l3_state,
392 &brw_cs_image_surfaces,
393 &gen7_cs_push_constants,
394 &brw_cs_pull_constants,
395 &brw_cs_ubo_surfaces,
396 &brw_cs_abo_surfaces,
397 &brw_texture_surfaces,
398 &brw_cs_work_groups_surface,
399 &brw_cs_state,
400 };
401
402 static void
403 brw_upload_initial_gpu_state(struct brw_context *brw)
404 {
405 /* On platforms with hardware contexts, we can set our initial GPU state
406 * right away rather than doing it via state atoms. This saves a small
407 * amount of overhead on every draw call.
408 */
409 if (!brw->hw_ctx)
410 return;
411
412 if (brw->gen == 6)
413 brw_emit_post_sync_nonzero_flush(brw);
414
415 brw_upload_invariant_state(brw);
416
417 /* Recommended optimization for Victim Cache eviction in pixel backend. */
418 if (brw->gen >= 9) {
419 BEGIN_BATCH(3);
420 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
421 OUT_BATCH(GEN7_CACHE_MODE_1);
422 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
423 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
424 ADVANCE_BATCH();
425 }
426
427 if (brw->gen >= 8) {
428 gen8_emit_3dstate_sample_pattern(brw);
429 }
430 }
431
432 static inline const struct brw_tracked_state *
433 brw_get_pipeline_atoms(struct brw_context *brw,
434 enum brw_pipeline pipeline)
435 {
436 switch (pipeline) {
437 case BRW_RENDER_PIPELINE:
438 return brw->render_atoms;
439 case BRW_COMPUTE_PIPELINE:
440 return brw->compute_atoms;
441 default:
442 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
443 unreachable("Unsupported pipeline");
444 return NULL;
445 }
446 }
447
448 static void
449 brw_copy_pipeline_atoms(struct brw_context *brw,
450 enum brw_pipeline pipeline,
451 const struct brw_tracked_state **atoms,
452 int num_atoms)
453 {
454 /* This is to work around brw_context::atoms being declared const. We want
455 * it to be const, but it needs to be initialized somehow!
456 */
457 struct brw_tracked_state *context_atoms =
458 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
459
460 for (int i = 0; i < num_atoms; i++) {
461 context_atoms[i] = *atoms[i];
462 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
463 assert(context_atoms[i].emit);
464 }
465
466 brw->num_atoms[pipeline] = num_atoms;
467 }
468
469 void brw_init_state( struct brw_context *brw )
470 {
471 struct gl_context *ctx = &brw->ctx;
472
473 /* Force the first brw_select_pipeline to emit pipeline select */
474 brw->last_pipeline = BRW_NUM_PIPELINES;
475
476 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
477 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
478 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
479 ARRAY_SIZE(brw->render_atoms));
480 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
481 ARRAY_SIZE(brw->render_atoms));
482 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
483 ARRAY_SIZE(brw->compute_atoms));
484 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
485 ARRAY_SIZE(brw->compute_atoms));
486
487 brw_init_caches(brw);
488
489 if (brw->gen >= 8) {
490 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
491 gen8_render_atoms,
492 ARRAY_SIZE(gen8_render_atoms));
493 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
494 gen8_compute_atoms,
495 ARRAY_SIZE(gen8_compute_atoms));
496 } else if (brw->gen == 7) {
497 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
498 gen7_render_atoms,
499 ARRAY_SIZE(gen7_render_atoms));
500 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
501 gen7_compute_atoms,
502 ARRAY_SIZE(gen7_compute_atoms));
503 } else if (brw->gen == 6) {
504 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
505 gen6_atoms, ARRAY_SIZE(gen6_atoms));
506 } else {
507 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
508 gen4_atoms, ARRAY_SIZE(gen4_atoms));
509 }
510
511 brw_upload_initial_gpu_state(brw);
512
513 brw->NewGLState = ~0;
514 brw->ctx.NewDriverState = ~0ull;
515
516 /* ~0 is a nonsensical value which won't match anything we program, so
517 * the programming will take effect on the first time around.
518 */
519 brw->pma_stall_bits = ~0;
520
521 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
522 * dirty flags.
523 */
524 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
525
526 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
527 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
528 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
529 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
530 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
531 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
532 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
533 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
534 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
535 }
536
537
538 void brw_destroy_state( struct brw_context *brw )
539 {
540 brw_destroy_caches(brw);
541 }
542
543 /***********************************************************************
544 */
545
546 static bool
547 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
548 {
549 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
550 }
551
552 static void accumulate_state( struct brw_state_flags *a,
553 const struct brw_state_flags *b )
554 {
555 a->mesa |= b->mesa;
556 a->brw |= b->brw;
557 }
558
559
560 static void xor_states( struct brw_state_flags *result,
561 const struct brw_state_flags *a,
562 const struct brw_state_flags *b )
563 {
564 result->mesa = a->mesa ^ b->mesa;
565 result->brw = a->brw ^ b->brw;
566 }
567
568 struct dirty_bit_map {
569 uint64_t bit;
570 char *name;
571 uint32_t count;
572 };
573
574 #define DEFINE_BIT(name) {name, #name, 0}
575
576 static struct dirty_bit_map mesa_bits[] = {
577 DEFINE_BIT(_NEW_MODELVIEW),
578 DEFINE_BIT(_NEW_PROJECTION),
579 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
580 DEFINE_BIT(_NEW_COLOR),
581 DEFINE_BIT(_NEW_DEPTH),
582 DEFINE_BIT(_NEW_EVAL),
583 DEFINE_BIT(_NEW_FOG),
584 DEFINE_BIT(_NEW_HINT),
585 DEFINE_BIT(_NEW_LIGHT),
586 DEFINE_BIT(_NEW_LINE),
587 DEFINE_BIT(_NEW_PIXEL),
588 DEFINE_BIT(_NEW_POINT),
589 DEFINE_BIT(_NEW_POLYGON),
590 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
591 DEFINE_BIT(_NEW_SCISSOR),
592 DEFINE_BIT(_NEW_STENCIL),
593 DEFINE_BIT(_NEW_TEXTURE),
594 DEFINE_BIT(_NEW_TRANSFORM),
595 DEFINE_BIT(_NEW_VIEWPORT),
596 DEFINE_BIT(_NEW_ARRAY),
597 DEFINE_BIT(_NEW_RENDERMODE),
598 DEFINE_BIT(_NEW_BUFFERS),
599 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
600 DEFINE_BIT(_NEW_MULTISAMPLE),
601 DEFINE_BIT(_NEW_TRACK_MATRIX),
602 DEFINE_BIT(_NEW_PROGRAM),
603 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
604 DEFINE_BIT(_NEW_BUFFER_OBJECT),
605 DEFINE_BIT(_NEW_FRAG_CLAMP),
606 /* Avoid sign extension problems. */
607 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
608 {0, 0, 0}
609 };
610
611 static struct dirty_bit_map brw_bits[] = {
612 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
613 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
614 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
615 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
616 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
617 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
618 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
619 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
620 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
621 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
622 DEFINE_BIT(BRW_NEW_URB_FENCE),
623 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
624 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
625 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
626 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
627 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
628 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
629 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
630 DEFINE_BIT(BRW_NEW_PRIMITIVE),
631 DEFINE_BIT(BRW_NEW_CONTEXT),
632 DEFINE_BIT(BRW_NEW_PSP),
633 DEFINE_BIT(BRW_NEW_SURFACES),
634 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
635 DEFINE_BIT(BRW_NEW_INDICES),
636 DEFINE_BIT(BRW_NEW_VERTICES),
637 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
638 DEFINE_BIT(BRW_NEW_BATCH),
639 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
640 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
641 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
642 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
643 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
644 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
645 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
646 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
647 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
648 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
649 DEFINE_BIT(BRW_NEW_STATS_WM),
650 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
651 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
652 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
653 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
654 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
655 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
656 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
657 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
658 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
659 DEFINE_BIT(BRW_NEW_CC_VP),
660 DEFINE_BIT(BRW_NEW_SF_VP),
661 DEFINE_BIT(BRW_NEW_CLIP_VP),
662 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
663 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
664 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
665 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
666 DEFINE_BIT(BRW_NEW_URB_SIZE),
667 {0, 0, 0}
668 };
669
670 static void
671 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
672 {
673 for (int i = 0; bit_map[i].bit != 0; i++) {
674 if (bit_map[i].bit & bits)
675 bit_map[i].count++;
676 }
677 }
678
679 static void
680 brw_print_dirty_count(struct dirty_bit_map *bit_map)
681 {
682 for (int i = 0; bit_map[i].bit != 0; i++) {
683 if (bit_map[i].count > 1) {
684 fprintf(stderr, "0x%016lx: %12d (%s)\n",
685 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
686 }
687 }
688 }
689
690 static inline void
691 brw_upload_tess_programs(struct brw_context *brw)
692 {
693 if (brw->tess_eval_program) {
694 uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
695 uint32_t per_patch_slots =
696 brw->tess_eval_program->Base.PatchInputsRead;
697
698 /* The TCS may have additional outputs which aren't read by the
699 * TES (possibly for cross-thread communication). These need to
700 * be stored in the Patch URB Entry as well.
701 */
702 if (brw->tess_ctrl_program) {
703 per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
704 per_patch_slots |=
705 brw->tess_ctrl_program->Base.PatchOutputsWritten;
706 }
707
708 brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
709 brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
710 } else {
711 brw->tcs.prog_data = NULL;
712 brw->tcs.base.prog_data = NULL;
713 brw->tes.prog_data = NULL;
714 brw->tes.base.prog_data = NULL;
715 }
716 }
717
718 static inline void
719 brw_upload_programs(struct brw_context *brw,
720 enum brw_pipeline pipeline)
721 {
722 if (pipeline == BRW_RENDER_PIPELINE) {
723 brw_upload_vs_prog(brw);
724 brw_upload_tess_programs(brw);
725
726 if (brw->gen < 6)
727 brw_upload_ff_gs_prog(brw);
728 else
729 brw_upload_gs_prog(brw);
730
731 /* Update the VUE map for data exiting the GS stage of the pipeline.
732 * This comes from the last enabled shader stage.
733 */
734 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
735 bool old_separate = brw->vue_map_geom_out.separate;
736 if (brw->geometry_program)
737 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
738 else if (brw->tess_eval_program)
739 brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
740 else
741 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
742
743 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
744 if (old_slots != brw->vue_map_geom_out.slots_valid ||
745 old_separate != brw->vue_map_geom_out.separate)
746 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
747
748 brw_upload_wm_prog(brw);
749 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
750 brw_upload_cs_prog(brw);
751 }
752 }
753
754 static inline void
755 merge_ctx_state(struct brw_context *brw,
756 struct brw_state_flags *state)
757 {
758 state->mesa |= brw->NewGLState;
759 state->brw |= brw->ctx.NewDriverState;
760 }
761
762 static inline void
763 check_and_emit_atom(struct brw_context *brw,
764 struct brw_state_flags *state,
765 const struct brw_tracked_state *atom)
766 {
767 if (check_state(state, &atom->dirty)) {
768 atom->emit(brw);
769 merge_ctx_state(brw, state);
770 }
771 }
772
773 static inline void
774 brw_upload_pipeline_state(struct brw_context *brw,
775 enum brw_pipeline pipeline)
776 {
777 struct gl_context *ctx = &brw->ctx;
778 int i;
779 static int dirty_count = 0;
780 struct brw_state_flags state = brw->state.pipelines[pipeline];
781 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
782
783 brw_select_pipeline(brw, pipeline);
784
785 if (0) {
786 /* Always re-emit all state. */
787 brw->NewGLState = ~0;
788 ctx->NewDriverState = ~0ull;
789 }
790
791 if (pipeline == BRW_RENDER_PIPELINE) {
792 if (brw->fragment_program != ctx->FragmentProgram._Current) {
793 brw->fragment_program = ctx->FragmentProgram._Current;
794 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
795 }
796
797 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
798 brw->tess_eval_program = ctx->TessEvalProgram._Current;
799 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
800 }
801
802 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
803 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
804 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
805 }
806
807 if (brw->geometry_program != ctx->GeometryProgram._Current) {
808 brw->geometry_program = ctx->GeometryProgram._Current;
809 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
810 }
811
812 if (brw->vertex_program != ctx->VertexProgram._Current) {
813 brw->vertex_program = ctx->VertexProgram._Current;
814 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
815 }
816 }
817
818 if (brw->compute_program != ctx->ComputeProgram._Current) {
819 brw->compute_program = ctx->ComputeProgram._Current;
820 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
821 }
822
823 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
824 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
825 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
826 }
827
828 if (brw->num_samples != fb_samples) {
829 brw->num_samples = fb_samples;
830 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
831 }
832
833 /* Exit early if no state is flagged as dirty */
834 merge_ctx_state(brw, &state);
835 if ((state.mesa | state.brw) == 0)
836 return;
837
838 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
839 if (brw->gen == 6)
840 brw_emit_post_sync_nonzero_flush(brw);
841
842 brw_upload_programs(brw, pipeline);
843 merge_ctx_state(brw, &state);
844
845 const struct brw_tracked_state *atoms =
846 brw_get_pipeline_atoms(brw, pipeline);
847 const int num_atoms = brw->num_atoms[pipeline];
848
849 if (unlikely(INTEL_DEBUG)) {
850 /* Debug version which enforces various sanity checks on the
851 * state flags which are generated and checked to help ensure
852 * state atoms are ordered correctly in the list.
853 */
854 struct brw_state_flags examined, prev;
855 memset(&examined, 0, sizeof(examined));
856 prev = state;
857
858 for (i = 0; i < num_atoms; i++) {
859 const struct brw_tracked_state *atom = &atoms[i];
860 struct brw_state_flags generated;
861
862 check_and_emit_atom(brw, &state, atom);
863
864 accumulate_state(&examined, &atom->dirty);
865
866 /* generated = (prev ^ state)
867 * if (examined & generated)
868 * fail;
869 */
870 xor_states(&generated, &prev, &state);
871 assert(!check_state(&examined, &generated));
872 prev = state;
873 }
874 }
875 else {
876 for (i = 0; i < num_atoms; i++) {
877 const struct brw_tracked_state *atom = &atoms[i];
878
879 check_and_emit_atom(brw, &state, atom);
880 }
881 }
882
883 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
884 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
885
886 brw_update_dirty_count(mesa_bits, state.mesa);
887 brw_update_dirty_count(brw_bits, state.brw);
888 if (dirty_count++ % 1000 == 0) {
889 brw_print_dirty_count(mesa_bits);
890 brw_print_dirty_count(brw_bits);
891 fprintf(stderr, "\n");
892 }
893 }
894 }
895
896 /***********************************************************************
897 * Emit all state:
898 */
899 void brw_upload_render_state(struct brw_context *brw)
900 {
901 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
902 }
903
904 static inline void
905 brw_pipeline_state_finished(struct brw_context *brw,
906 enum brw_pipeline pipeline)
907 {
908 /* Save all dirty state into the other pipelines */
909 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
910 if (i != pipeline) {
911 brw->state.pipelines[i].mesa |= brw->NewGLState;
912 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
913 } else {
914 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
915 }
916 }
917
918 brw->NewGLState = 0;
919 brw->ctx.NewDriverState = 0ull;
920 }
921
922 /**
923 * Clear dirty bits to account for the fact that the state emitted by
924 * brw_upload_render_state() has been committed to the hardware. This is a
925 * separate call from brw_upload_render_state() because it's possible that
926 * after the call to brw_upload_render_state(), we will discover that we've
927 * run out of aperture space, and need to rewind the batch buffer to the state
928 * it had before the brw_upload_render_state() call.
929 */
930 void
931 brw_render_state_finished(struct brw_context *brw)
932 {
933 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
934 }
935
936 void
937 brw_upload_compute_state(struct brw_context *brw)
938 {
939 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
940 }
941
942 void
943 brw_compute_state_finished(struct brw_context *brw)
944 {
945 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
946 }