i965: Reduce cross-pollination between the DRI driver and compiler
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "brw_program.h"
37 #include "drivers/common/meta.h"
38 #include "intel_batchbuffer.h"
39 #include "intel_buffers.h"
40 #include "brw_vs.h"
41 #include "brw_ff_gs.h"
42 #include "brw_gs.h"
43 #include "brw_wm.h"
44 #include "brw_cs.h"
45 #include "main/framebuffer.h"
46
47 static const struct brw_tracked_state *gen4_atoms[] =
48 {
49 /* Once all the programs are done, we know how large urb entry
50 * sizes need to be and can decide if we need to change the urb
51 * layout.
52 */
53 &brw_curbe_offsets,
54 &brw_recalculate_urb_fence,
55
56 &brw_cc_vp,
57 &brw_cc_unit,
58
59 /* Surface state setup. Must come before the VS/WM unit. The binding
60 * table upload must be last.
61 */
62 &brw_vs_pull_constants,
63 &brw_wm_pull_constants,
64 &brw_renderbuffer_surfaces,
65 &brw_renderbuffer_read_surfaces,
66 &brw_texture_surfaces,
67 &brw_vs_binding_table,
68 &brw_wm_binding_table,
69
70 &brw_fs_samplers,
71 &brw_vs_samplers,
72
73 /* These set up state for brw_psp_urb_cbs */
74 &brw_wm_unit,
75 &brw_sf_vp,
76 &brw_sf_unit,
77 &brw_vs_unit, /* always required, enabled or not */
78 &brw_clip_unit,
79 &brw_gs_unit,
80
81 /* Command packets:
82 */
83 &brw_invariant_state,
84
85 &brw_binding_table_pointers,
86 &brw_blend_constant_color,
87
88 &brw_depthbuffer,
89
90 &brw_polygon_stipple,
91 &brw_polygon_stipple_offset,
92
93 &brw_line_stipple,
94
95 &brw_psp_urb_cbs,
96
97 &brw_drawing_rect,
98 &brw_indices, /* must come before brw_vertices */
99 &brw_index_buffer,
100 &brw_vertices,
101
102 &brw_constant_buffer
103 };
104
105 static const struct brw_tracked_state *gen6_atoms[] =
106 {
107 &gen6_sf_and_clip_viewports,
108
109 /* Command packets: */
110
111 &brw_cc_vp,
112 &gen6_viewport_state, /* must do after *_vp stages */
113
114 &gen6_urb,
115 &gen6_blend_state, /* must do before cc unit */
116 &gen6_color_calc_state, /* must do before cc unit */
117 &gen6_depth_stencil_state, /* must do before cc unit */
118
119 &gen6_vs_push_constants, /* Before vs_state */
120 &gen6_gs_push_constants, /* Before gs_state */
121 &gen6_wm_push_constants, /* Before wm_state */
122
123 /* Surface state setup. Must come before the VS/WM unit. The binding
124 * table upload must be last.
125 */
126 &brw_vs_pull_constants,
127 &brw_vs_ubo_surfaces,
128 &brw_gs_pull_constants,
129 &brw_gs_ubo_surfaces,
130 &brw_wm_pull_constants,
131 &brw_wm_ubo_surfaces,
132 &gen6_renderbuffer_surfaces,
133 &brw_renderbuffer_read_surfaces,
134 &brw_texture_surfaces,
135 &gen6_sol_surface,
136 &brw_vs_binding_table,
137 &gen6_gs_binding_table,
138 &brw_wm_binding_table,
139
140 &brw_fs_samplers,
141 &brw_vs_samplers,
142 &brw_gs_samplers,
143 &gen6_sampler_state,
144 &gen6_multisample_state,
145
146 &gen6_vs_state,
147 &gen6_gs_state,
148 &gen6_clip_state,
149 &gen6_sf_state,
150 &gen6_wm_state,
151
152 &gen6_scissor_state,
153
154 &gen6_binding_table_pointers,
155
156 &brw_depthbuffer,
157
158 &brw_polygon_stipple,
159 &brw_polygon_stipple_offset,
160
161 &brw_line_stipple,
162
163 &brw_drawing_rect,
164
165 &brw_indices, /* must come before brw_vertices */
166 &brw_index_buffer,
167 &brw_vertices,
168 };
169
170 static const struct brw_tracked_state *gen7_render_atoms[] =
171 {
172 /* Command packets: */
173
174 &brw_cc_vp,
175 &gen7_sf_clip_viewport,
176
177 &gen7_l3_state,
178 &gen7_push_constant_space,
179 &gen7_urb,
180 &gen6_blend_state, /* must do before cc unit */
181 &gen6_color_calc_state, /* must do before cc unit */
182 &gen6_depth_stencil_state, /* must do before cc unit */
183
184 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
185
186 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
187 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
188 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
189 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
190 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
191
192 &gen6_vs_push_constants, /* Before vs_state */
193 &gen7_tcs_push_constants,
194 &gen7_tes_push_constants,
195 &gen6_gs_push_constants, /* Before gs_state */
196 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
197
198 /* Surface state setup. Must come before the VS/WM unit. The binding
199 * table upload must be last.
200 */
201 &brw_vs_pull_constants,
202 &brw_vs_ubo_surfaces,
203 &brw_vs_abo_surfaces,
204 &brw_tcs_pull_constants,
205 &brw_tcs_ubo_surfaces,
206 &brw_tcs_abo_surfaces,
207 &brw_tes_pull_constants,
208 &brw_tes_ubo_surfaces,
209 &brw_tes_abo_surfaces,
210 &brw_gs_pull_constants,
211 &brw_gs_ubo_surfaces,
212 &brw_gs_abo_surfaces,
213 &brw_wm_pull_constants,
214 &brw_wm_ubo_surfaces,
215 &brw_wm_abo_surfaces,
216 &gen6_renderbuffer_surfaces,
217 &brw_renderbuffer_read_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_tcs_binding_table,
221 &brw_tes_binding_table,
222 &brw_gs_binding_table,
223 &brw_wm_binding_table,
224
225 &brw_fs_samplers,
226 &brw_vs_samplers,
227 &brw_tcs_samplers,
228 &brw_tes_samplers,
229 &brw_gs_samplers,
230 &gen6_multisample_state,
231
232 &gen7_vs_state,
233 &gen7_hs_state,
234 &gen7_te_state,
235 &gen7_ds_state,
236 &gen7_gs_state,
237 &gen7_sol_state,
238 &gen6_clip_state,
239 &gen7_sbe_state,
240 &gen7_sf_state,
241 &gen7_wm_state,
242 &gen7_ps_state,
243
244 &gen6_scissor_state,
245
246 &gen7_depthbuffer,
247
248 &brw_polygon_stipple,
249 &brw_polygon_stipple_offset,
250
251 &brw_line_stipple,
252
253 &brw_drawing_rect,
254
255 &brw_indices, /* must come before brw_vertices */
256 &brw_index_buffer,
257 &brw_vertices,
258
259 &haswell_cut_index,
260 };
261
262 static const struct brw_tracked_state *gen7_compute_atoms[] =
263 {
264 &gen7_l3_state,
265 &brw_cs_image_surfaces,
266 &gen7_cs_push_constants,
267 &brw_cs_pull_constants,
268 &brw_cs_ubo_surfaces,
269 &brw_cs_abo_surfaces,
270 &brw_cs_texture_surfaces,
271 &brw_cs_work_groups_surface,
272 &brw_cs_samplers,
273 &brw_cs_state,
274 };
275
276 static const struct brw_tracked_state *gen8_render_atoms[] =
277 {
278 &brw_cc_vp,
279 &gen8_sf_clip_viewport,
280
281 &gen7_l3_state,
282 &gen7_push_constant_space,
283 &gen7_urb,
284 &gen8_blend_state,
285 &gen6_color_calc_state,
286
287 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
288
289 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
290 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
291 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
292 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
293 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
294
295 &gen6_vs_push_constants, /* Before vs_state */
296 &gen7_tcs_push_constants,
297 &gen7_tes_push_constants,
298 &gen6_gs_push_constants, /* Before gs_state */
299 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
300
301 /* Surface state setup. Must come before the VS/WM unit. The binding
302 * table upload must be last.
303 */
304 &brw_vs_pull_constants,
305 &brw_vs_ubo_surfaces,
306 &brw_vs_abo_surfaces,
307 &brw_tcs_pull_constants,
308 &brw_tcs_ubo_surfaces,
309 &brw_tcs_abo_surfaces,
310 &brw_tes_pull_constants,
311 &brw_tes_ubo_surfaces,
312 &brw_tes_abo_surfaces,
313 &brw_gs_pull_constants,
314 &brw_gs_ubo_surfaces,
315 &brw_gs_abo_surfaces,
316 &brw_wm_pull_constants,
317 &brw_wm_ubo_surfaces,
318 &brw_wm_abo_surfaces,
319 &gen6_renderbuffer_surfaces,
320 &brw_renderbuffer_read_surfaces,
321 &brw_texture_surfaces,
322 &brw_vs_binding_table,
323 &brw_tcs_binding_table,
324 &brw_tes_binding_table,
325 &brw_gs_binding_table,
326 &brw_wm_binding_table,
327
328 &brw_fs_samplers,
329 &brw_vs_samplers,
330 &brw_tcs_samplers,
331 &brw_tes_samplers,
332 &brw_gs_samplers,
333 &gen8_multisample_state,
334
335 &gen8_vs_state,
336 &gen8_hs_state,
337 &gen7_te_state,
338 &gen8_ds_state,
339 &gen8_gs_state,
340 &gen7_sol_state,
341 &gen6_clip_state,
342 &gen8_raster_state,
343 &gen8_sbe_state,
344 &gen8_sf_state,
345 &gen8_ps_blend,
346 &gen8_ps_extra,
347 &gen8_ps_state,
348 &gen8_wm_depth_stencil,
349 &gen8_wm_state,
350
351 &gen6_scissor_state,
352
353 &gen7_depthbuffer,
354
355 &brw_polygon_stipple,
356 &brw_polygon_stipple_offset,
357
358 &brw_line_stipple,
359
360 &brw_drawing_rect,
361
362 &gen8_vf_topology,
363
364 &brw_indices,
365 &gen8_index_buffer,
366 &gen8_vertices,
367
368 &haswell_cut_index,
369 &gen8_pma_fix,
370 };
371
372 static const struct brw_tracked_state *gen8_compute_atoms[] =
373 {
374 &gen7_l3_state,
375 &brw_cs_image_surfaces,
376 &gen7_cs_push_constants,
377 &brw_cs_pull_constants,
378 &brw_cs_ubo_surfaces,
379 &brw_cs_abo_surfaces,
380 &brw_cs_texture_surfaces,
381 &brw_cs_work_groups_surface,
382 &brw_cs_samplers,
383 &brw_cs_state,
384 };
385
386 static void
387 brw_upload_initial_gpu_state(struct brw_context *brw)
388 {
389 /* On platforms with hardware contexts, we can set our initial GPU state
390 * right away rather than doing it via state atoms. This saves a small
391 * amount of overhead on every draw call.
392 */
393 if (!brw->hw_ctx)
394 return;
395
396 if (brw->gen == 6)
397 brw_emit_post_sync_nonzero_flush(brw);
398
399 brw_upload_invariant_state(brw);
400
401 /* Recommended optimization for Victim Cache eviction in pixel backend. */
402 if (brw->gen >= 9) {
403 BEGIN_BATCH(3);
404 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
405 OUT_BATCH(GEN7_CACHE_MODE_1);
406 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
407 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
408 ADVANCE_BATCH();
409 }
410
411 if (brw->gen >= 8) {
412 gen8_emit_3dstate_sample_pattern(brw);
413
414 BEGIN_BATCH(5);
415 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
416 OUT_BATCH(0);
417 OUT_BATCH(0);
418 OUT_BATCH(0);
419 OUT_BATCH(0);
420 ADVANCE_BATCH();
421
422 BEGIN_BATCH(2);
423 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
424 OUT_BATCH(0);
425 ADVANCE_BATCH();
426 }
427 }
428
429 static inline const struct brw_tracked_state *
430 brw_get_pipeline_atoms(struct brw_context *brw,
431 enum brw_pipeline pipeline)
432 {
433 switch (pipeline) {
434 case BRW_RENDER_PIPELINE:
435 return brw->render_atoms;
436 case BRW_COMPUTE_PIPELINE:
437 return brw->compute_atoms;
438 default:
439 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
440 unreachable("Unsupported pipeline");
441 return NULL;
442 }
443 }
444
445 static void
446 brw_copy_pipeline_atoms(struct brw_context *brw,
447 enum brw_pipeline pipeline,
448 const struct brw_tracked_state **atoms,
449 int num_atoms)
450 {
451 /* This is to work around brw_context::atoms being declared const. We want
452 * it to be const, but it needs to be initialized somehow!
453 */
454 struct brw_tracked_state *context_atoms =
455 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
456
457 for (int i = 0; i < num_atoms; i++) {
458 context_atoms[i] = *atoms[i];
459 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
460 assert(context_atoms[i].emit);
461 }
462
463 brw->num_atoms[pipeline] = num_atoms;
464 }
465
466 void brw_init_state( struct brw_context *brw )
467 {
468 struct gl_context *ctx = &brw->ctx;
469
470 /* Force the first brw_select_pipeline to emit pipeline select */
471 brw->last_pipeline = BRW_NUM_PIPELINES;
472
473 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
474 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
475 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
476 ARRAY_SIZE(brw->render_atoms));
477 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
478 ARRAY_SIZE(brw->render_atoms));
479 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
480 ARRAY_SIZE(brw->compute_atoms));
481 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
482 ARRAY_SIZE(brw->compute_atoms));
483
484 brw_init_caches(brw);
485
486 if (brw->gen >= 8) {
487 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
488 gen8_render_atoms,
489 ARRAY_SIZE(gen8_render_atoms));
490 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
491 gen8_compute_atoms,
492 ARRAY_SIZE(gen8_compute_atoms));
493 } else if (brw->gen == 7) {
494 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
495 gen7_render_atoms,
496 ARRAY_SIZE(gen7_render_atoms));
497 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
498 gen7_compute_atoms,
499 ARRAY_SIZE(gen7_compute_atoms));
500 } else if (brw->gen == 6) {
501 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
502 gen6_atoms, ARRAY_SIZE(gen6_atoms));
503 } else {
504 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
505 gen4_atoms, ARRAY_SIZE(gen4_atoms));
506 }
507
508 brw_upload_initial_gpu_state(brw);
509
510 brw->NewGLState = ~0;
511 brw->ctx.NewDriverState = ~0ull;
512
513 /* ~0 is a nonsensical value which won't match anything we program, so
514 * the programming will take effect on the first time around.
515 */
516 brw->pma_stall_bits = ~0;
517
518 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
519 * dirty flags.
520 */
521 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
522
523 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
524 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
525 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
526 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
527 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
528 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
529 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
530 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
531 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
532 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
533 }
534
535
536 void brw_destroy_state( struct brw_context *brw )
537 {
538 brw_destroy_caches(brw);
539 }
540
541 /***********************************************************************
542 */
543
544 static bool
545 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
546 {
547 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
548 }
549
550 static void accumulate_state( struct brw_state_flags *a,
551 const struct brw_state_flags *b )
552 {
553 a->mesa |= b->mesa;
554 a->brw |= b->brw;
555 }
556
557
558 static void xor_states( struct brw_state_flags *result,
559 const struct brw_state_flags *a,
560 const struct brw_state_flags *b )
561 {
562 result->mesa = a->mesa ^ b->mesa;
563 result->brw = a->brw ^ b->brw;
564 }
565
566 struct dirty_bit_map {
567 uint64_t bit;
568 char *name;
569 uint32_t count;
570 };
571
572 #define DEFINE_BIT(name) {name, #name, 0}
573
574 static struct dirty_bit_map mesa_bits[] = {
575 DEFINE_BIT(_NEW_MODELVIEW),
576 DEFINE_BIT(_NEW_PROJECTION),
577 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
578 DEFINE_BIT(_NEW_COLOR),
579 DEFINE_BIT(_NEW_DEPTH),
580 DEFINE_BIT(_NEW_EVAL),
581 DEFINE_BIT(_NEW_FOG),
582 DEFINE_BIT(_NEW_HINT),
583 DEFINE_BIT(_NEW_LIGHT),
584 DEFINE_BIT(_NEW_LINE),
585 DEFINE_BIT(_NEW_PIXEL),
586 DEFINE_BIT(_NEW_POINT),
587 DEFINE_BIT(_NEW_POLYGON),
588 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
589 DEFINE_BIT(_NEW_SCISSOR),
590 DEFINE_BIT(_NEW_STENCIL),
591 DEFINE_BIT(_NEW_TEXTURE),
592 DEFINE_BIT(_NEW_TRANSFORM),
593 DEFINE_BIT(_NEW_VIEWPORT),
594 DEFINE_BIT(_NEW_ARRAY),
595 DEFINE_BIT(_NEW_RENDERMODE),
596 DEFINE_BIT(_NEW_BUFFERS),
597 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
598 DEFINE_BIT(_NEW_MULTISAMPLE),
599 DEFINE_BIT(_NEW_TRACK_MATRIX),
600 DEFINE_BIT(_NEW_PROGRAM),
601 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
602 DEFINE_BIT(_NEW_BUFFER_OBJECT),
603 DEFINE_BIT(_NEW_FRAG_CLAMP),
604 /* Avoid sign extension problems. */
605 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
606 {0, 0, 0}
607 };
608
609 static struct dirty_bit_map brw_bits[] = {
610 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
611 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
612 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
613 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
614 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
615 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
616 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
617 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
618 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
619 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
620 DEFINE_BIT(BRW_NEW_URB_FENCE),
621 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
622 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
623 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
624 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
625 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
626 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
627 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
628 DEFINE_BIT(BRW_NEW_PRIMITIVE),
629 DEFINE_BIT(BRW_NEW_CONTEXT),
630 DEFINE_BIT(BRW_NEW_PSP),
631 DEFINE_BIT(BRW_NEW_SURFACES),
632 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
633 DEFINE_BIT(BRW_NEW_INDICES),
634 DEFINE_BIT(BRW_NEW_VERTICES),
635 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
636 DEFINE_BIT(BRW_NEW_BATCH),
637 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
638 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
639 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
640 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
641 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
642 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
643 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
644 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
645 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
646 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
647 DEFINE_BIT(BRW_NEW_STATS_WM),
648 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
649 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
650 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
651 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
652 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
653 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
654 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
655 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
656 DEFINE_BIT(BRW_NEW_CC_VP),
657 DEFINE_BIT(BRW_NEW_SF_VP),
658 DEFINE_BIT(BRW_NEW_CLIP_VP),
659 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
660 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
661 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
662 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
663 DEFINE_BIT(BRW_NEW_URB_SIZE),
664 DEFINE_BIT(BRW_NEW_CC_STATE),
665 DEFINE_BIT(BRW_NEW_BLORP),
666 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
667 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
668 {0, 0, 0}
669 };
670
671 static void
672 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
673 {
674 for (int i = 0; bit_map[i].bit != 0; i++) {
675 if (bit_map[i].bit & bits)
676 bit_map[i].count++;
677 }
678 }
679
680 static void
681 brw_print_dirty_count(struct dirty_bit_map *bit_map)
682 {
683 for (int i = 0; bit_map[i].bit != 0; i++) {
684 if (bit_map[i].count > 1) {
685 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
686 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
687 }
688 }
689 }
690
691 static inline void
692 brw_upload_tess_programs(struct brw_context *brw)
693 {
694 if (brw->tess_eval_program) {
695 brw_upload_tcs_prog(brw);
696 brw_upload_tes_prog(brw);
697 } else {
698 brw->tcs.base.prog_data = NULL;
699 brw->tes.base.prog_data = NULL;
700 }
701 }
702
703 static inline void
704 brw_upload_programs(struct brw_context *brw,
705 enum brw_pipeline pipeline)
706 {
707 struct gl_context *ctx = &brw->ctx;
708
709 if (pipeline == BRW_RENDER_PIPELINE) {
710 brw_upload_vs_prog(brw);
711 brw_upload_tess_programs(brw);
712
713 if (brw->gen < 6)
714 brw_upload_ff_gs_prog(brw);
715 else
716 brw_upload_gs_prog(brw);
717
718 /* Update the VUE map for data exiting the GS stage of the pipeline.
719 * This comes from the last enabled shader stage.
720 */
721 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
722 bool old_separate = brw->vue_map_geom_out.separate;
723 struct brw_vue_prog_data *vue_prog_data;
724 if (brw->geometry_program)
725 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
726 else if (brw->tess_eval_program)
727 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
728 else
729 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
730
731 brw->vue_map_geom_out = vue_prog_data->vue_map;
732
733 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
734 if (old_slots != brw->vue_map_geom_out.slots_valid ||
735 old_separate != brw->vue_map_geom_out.separate)
736 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
737
738 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
739 VARYING_BIT_VIEWPORT) {
740 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
741 brw->clip.viewport_count =
742 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
743 ctx->Const.MaxViewports : 1;
744 }
745
746 brw_upload_wm_prog(brw);
747
748 if (brw->gen < 6) {
749 brw_upload_clip_prog(brw);
750 brw_upload_sf_prog(brw);
751 }
752 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
753 brw_upload_cs_prog(brw);
754 }
755 }
756
757 static inline void
758 merge_ctx_state(struct brw_context *brw,
759 struct brw_state_flags *state)
760 {
761 state->mesa |= brw->NewGLState;
762 state->brw |= brw->ctx.NewDriverState;
763 }
764
765 static inline void
766 check_and_emit_atom(struct brw_context *brw,
767 struct brw_state_flags *state,
768 const struct brw_tracked_state *atom)
769 {
770 if (check_state(state, &atom->dirty)) {
771 atom->emit(brw);
772 merge_ctx_state(brw, state);
773 }
774 }
775
776 static inline void
777 brw_upload_pipeline_state(struct brw_context *brw,
778 enum brw_pipeline pipeline)
779 {
780 struct gl_context *ctx = &brw->ctx;
781 int i;
782 static int dirty_count = 0;
783 struct brw_state_flags state = brw->state.pipelines[pipeline];
784 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
785
786 brw_select_pipeline(brw, pipeline);
787
788 if (0) {
789 /* Always re-emit all state. */
790 brw->NewGLState = ~0;
791 ctx->NewDriverState = ~0ull;
792 }
793
794 if (pipeline == BRW_RENDER_PIPELINE) {
795 if (brw->fragment_program != ctx->FragmentProgram._Current) {
796 brw->fragment_program = ctx->FragmentProgram._Current;
797 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
798 }
799
800 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
801 brw->tess_eval_program = ctx->TessEvalProgram._Current;
802 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
803 }
804
805 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
806 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
807 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
808 }
809
810 if (brw->geometry_program != ctx->GeometryProgram._Current) {
811 brw->geometry_program = ctx->GeometryProgram._Current;
812 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
813 }
814
815 if (brw->vertex_program != ctx->VertexProgram._Current) {
816 brw->vertex_program = ctx->VertexProgram._Current;
817 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
818 }
819 }
820
821 if (brw->compute_program != ctx->ComputeProgram._Current) {
822 brw->compute_program = ctx->ComputeProgram._Current;
823 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
824 }
825
826 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
827 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
828 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
829 }
830
831 if (brw->num_samples != fb_samples) {
832 brw->num_samples = fb_samples;
833 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
834 }
835
836 /* Exit early if no state is flagged as dirty */
837 merge_ctx_state(brw, &state);
838 if ((state.mesa | state.brw) == 0)
839 return;
840
841 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
842 if (brw->gen == 6)
843 brw_emit_post_sync_nonzero_flush(brw);
844
845 brw_upload_programs(brw, pipeline);
846 merge_ctx_state(brw, &state);
847
848 brw_upload_state_base_address(brw);
849
850 const struct brw_tracked_state *atoms =
851 brw_get_pipeline_atoms(brw, pipeline);
852 const int num_atoms = brw->num_atoms[pipeline];
853
854 if (unlikely(INTEL_DEBUG)) {
855 /* Debug version which enforces various sanity checks on the
856 * state flags which are generated and checked to help ensure
857 * state atoms are ordered correctly in the list.
858 */
859 struct brw_state_flags examined, prev;
860 memset(&examined, 0, sizeof(examined));
861 prev = state;
862
863 for (i = 0; i < num_atoms; i++) {
864 const struct brw_tracked_state *atom = &atoms[i];
865 struct brw_state_flags generated;
866
867 check_and_emit_atom(brw, &state, atom);
868
869 accumulate_state(&examined, &atom->dirty);
870
871 /* generated = (prev ^ state)
872 * if (examined & generated)
873 * fail;
874 */
875 xor_states(&generated, &prev, &state);
876 assert(!check_state(&examined, &generated));
877 prev = state;
878 }
879 }
880 else {
881 for (i = 0; i < num_atoms; i++) {
882 const struct brw_tracked_state *atom = &atoms[i];
883
884 check_and_emit_atom(brw, &state, atom);
885 }
886 }
887
888 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
889 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
890
891 brw_update_dirty_count(mesa_bits, state.mesa);
892 brw_update_dirty_count(brw_bits, state.brw);
893 if (dirty_count++ % 1000 == 0) {
894 brw_print_dirty_count(mesa_bits);
895 brw_print_dirty_count(brw_bits);
896 fprintf(stderr, "\n");
897 }
898 }
899 }
900
901 /***********************************************************************
902 * Emit all state:
903 */
904 void brw_upload_render_state(struct brw_context *brw)
905 {
906 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
907 }
908
909 static inline void
910 brw_pipeline_state_finished(struct brw_context *brw,
911 enum brw_pipeline pipeline)
912 {
913 /* Save all dirty state into the other pipelines */
914 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
915 if (i != pipeline) {
916 brw->state.pipelines[i].mesa |= brw->NewGLState;
917 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
918 } else {
919 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
920 }
921 }
922
923 brw->NewGLState = 0;
924 brw->ctx.NewDriverState = 0ull;
925 }
926
927 /**
928 * Clear dirty bits to account for the fact that the state emitted by
929 * brw_upload_render_state() has been committed to the hardware. This is a
930 * separate call from brw_upload_render_state() because it's possible that
931 * after the call to brw_upload_render_state(), we will discover that we've
932 * run out of aperture space, and need to rewind the batch buffer to the state
933 * it had before the brw_upload_render_state() call.
934 */
935 void
936 brw_render_state_finished(struct brw_context *brw)
937 {
938 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
939 }
940
941 void
942 brw_upload_compute_state(struct brw_context *brw)
943 {
944 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
945 }
946
947 void
948 brw_compute_state_finished(struct brw_context *brw)
949 {
950 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
951 }