i965: Split brw_upload_texture_surfaces into compute/render atoms.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 &brw_interpolation_map,
49
50 &brw_clip_prog, /* must do before state base address */
51 &brw_sf_prog, /* must do before state base address */
52
53 /* Once all the programs are done, we know how large urb entry
54 * sizes need to be and can decide if we need to change the urb
55 * layout.
56 */
57 &brw_curbe_offsets,
58 &brw_recalculate_urb_fence,
59
60 &brw_cc_vp,
61 &brw_cc_unit,
62
63 /* Surface state setup. Must come before the VS/WM unit. The binding
64 * table upload must be last.
65 */
66 &brw_vs_pull_constants,
67 &brw_wm_pull_constants,
68 &brw_renderbuffer_surfaces,
69 &brw_texture_surfaces,
70 &brw_vs_binding_table,
71 &brw_wm_binding_table,
72
73 &brw_fs_samplers,
74 &brw_vs_samplers,
75
76 /* These set up state for brw_psp_urb_cbs */
77 &brw_wm_unit,
78 &brw_sf_vp,
79 &brw_sf_unit,
80 &brw_vs_unit, /* always required, enabled or not */
81 &brw_clip_unit,
82 &brw_gs_unit,
83
84 /* Command packets:
85 */
86 &brw_invariant_state,
87 &brw_state_base_address,
88
89 &brw_binding_table_pointers,
90 &brw_blend_constant_color,
91
92 &brw_depthbuffer,
93
94 &brw_polygon_stipple,
95 &brw_polygon_stipple_offset,
96
97 &brw_line_stipple,
98 &brw_aa_line_parameters,
99
100 &brw_psp_urb_cbs,
101
102 &brw_drawing_rect,
103 &brw_indices, /* must come before brw_vertices */
104 &brw_index_buffer,
105 &brw_vertices,
106
107 &brw_constant_buffer
108 };
109
110 static const struct brw_tracked_state *gen6_atoms[] =
111 {
112 &gen6_clip_vp,
113 &gen6_sf_vp,
114
115 /* Command packets: */
116
117 /* must do before binding table pointers, cc state ptrs */
118 &brw_state_base_address,
119
120 &brw_cc_vp,
121 &gen6_viewport_state, /* must do after *_vp stages */
122
123 &gen6_urb,
124 &gen6_blend_state, /* must do before cc unit */
125 &gen6_color_calc_state, /* must do before cc unit */
126 &gen6_depth_stencil_state, /* must do before cc unit */
127
128 &gen6_vs_push_constants, /* Before vs_state */
129 &gen6_gs_push_constants, /* Before gs_state */
130 &gen6_wm_push_constants, /* Before wm_state */
131
132 /* Surface state setup. Must come before the VS/WM unit. The binding
133 * table upload must be last.
134 */
135 &brw_vs_pull_constants,
136 &brw_vs_ubo_surfaces,
137 &brw_gs_pull_constants,
138 &brw_gs_ubo_surfaces,
139 &brw_wm_pull_constants,
140 &brw_wm_ubo_surfaces,
141 &gen6_renderbuffer_surfaces,
142 &brw_texture_surfaces,
143 &gen6_sol_surface,
144 &brw_vs_binding_table,
145 &gen6_gs_binding_table,
146 &brw_wm_binding_table,
147
148 &brw_fs_samplers,
149 &brw_vs_samplers,
150 &brw_gs_samplers,
151 &gen6_sampler_state,
152 &gen6_multisample_state,
153
154 &gen6_vs_state,
155 &gen6_gs_state,
156 &gen6_clip_state,
157 &gen6_sf_state,
158 &gen6_wm_state,
159
160 &gen6_scissor_state,
161
162 &gen6_binding_table_pointers,
163
164 &brw_depthbuffer,
165
166 &brw_polygon_stipple,
167 &brw_polygon_stipple_offset,
168
169 &brw_line_stipple,
170 &brw_aa_line_parameters,
171
172 &brw_drawing_rect,
173
174 &brw_indices, /* must come before brw_vertices */
175 &brw_index_buffer,
176 &brw_vertices,
177 };
178
179 static const struct brw_tracked_state *gen7_render_atoms[] =
180 {
181 /* Command packets: */
182
183 /* must do before binding table pointers, cc state ptrs */
184 &brw_state_base_address,
185
186 &brw_cc_vp,
187 &gen7_sf_clip_viewport,
188
189 &gen7_l3_state,
190 &gen7_push_constant_space,
191 &gen7_urb,
192 &gen6_blend_state, /* must do before cc unit */
193 &gen6_color_calc_state, /* must do before cc unit */
194 &gen6_depth_stencil_state, /* must do before cc unit */
195
196 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
197
198 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
199 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
200 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
201 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
202 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
203
204 &gen6_vs_push_constants, /* Before vs_state */
205 &gen7_tcs_push_constants,
206 &gen7_tes_push_constants,
207 &gen6_gs_push_constants, /* Before gs_state */
208 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
209
210 /* Surface state setup. Must come before the VS/WM unit. The binding
211 * table upload must be last.
212 */
213 &brw_vs_pull_constants,
214 &brw_vs_ubo_surfaces,
215 &brw_vs_abo_surfaces,
216 &brw_tcs_pull_constants,
217 &brw_tcs_ubo_surfaces,
218 &brw_tcs_abo_surfaces,
219 &brw_tes_pull_constants,
220 &brw_tes_ubo_surfaces,
221 &brw_tes_abo_surfaces,
222 &brw_gs_pull_constants,
223 &brw_gs_ubo_surfaces,
224 &brw_gs_abo_surfaces,
225 &brw_wm_pull_constants,
226 &brw_wm_ubo_surfaces,
227 &brw_wm_abo_surfaces,
228 &gen6_renderbuffer_surfaces,
229 &brw_texture_surfaces,
230 &brw_vs_binding_table,
231 &brw_tcs_binding_table,
232 &brw_tes_binding_table,
233 &brw_gs_binding_table,
234 &brw_wm_binding_table,
235
236 &brw_fs_samplers,
237 &brw_vs_samplers,
238 &brw_tcs_samplers,
239 &brw_tes_samplers,
240 &brw_gs_samplers,
241 &gen6_multisample_state,
242
243 &gen7_vs_state,
244 &gen7_hs_state,
245 &gen7_te_state,
246 &gen7_ds_state,
247 &gen7_gs_state,
248 &gen7_sol_state,
249 &gen7_clip_state,
250 &gen7_sbe_state,
251 &gen7_sf_state,
252 &gen7_wm_state,
253 &gen7_ps_state,
254
255 &gen6_scissor_state,
256
257 &gen7_depthbuffer,
258
259 &brw_polygon_stipple,
260 &brw_polygon_stipple_offset,
261
262 &brw_line_stipple,
263 &brw_aa_line_parameters,
264
265 &brw_drawing_rect,
266
267 &brw_indices, /* must come before brw_vertices */
268 &brw_index_buffer,
269 &brw_vertices,
270
271 &haswell_cut_index,
272 };
273
274 static const struct brw_tracked_state *gen7_compute_atoms[] =
275 {
276 &brw_state_base_address,
277 &gen7_l3_state,
278 &brw_cs_image_surfaces,
279 &gen7_cs_push_constants,
280 &brw_cs_pull_constants,
281 &brw_cs_ubo_surfaces,
282 &brw_cs_abo_surfaces,
283 &brw_cs_texture_surfaces,
284 &brw_cs_work_groups_surface,
285 &brw_cs_samplers,
286 &brw_cs_state,
287 };
288
289 static const struct brw_tracked_state *gen8_render_atoms[] =
290 {
291 /* Command packets: */
292 &gen8_state_base_address,
293
294 &brw_cc_vp,
295 &gen8_sf_clip_viewport,
296
297 &gen7_l3_state,
298 &gen7_push_constant_space,
299 &gen7_urb,
300 &gen8_blend_state,
301 &gen6_color_calc_state,
302
303 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
304
305 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
306 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
307 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
308 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
309 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
310
311 &gen6_vs_push_constants, /* Before vs_state */
312 &gen7_tcs_push_constants,
313 &gen7_tes_push_constants,
314 &gen6_gs_push_constants, /* Before gs_state */
315 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
316
317 /* Surface state setup. Must come before the VS/WM unit. The binding
318 * table upload must be last.
319 */
320 &brw_vs_pull_constants,
321 &brw_vs_ubo_surfaces,
322 &brw_vs_abo_surfaces,
323 &brw_tcs_pull_constants,
324 &brw_tcs_ubo_surfaces,
325 &brw_tcs_abo_surfaces,
326 &brw_tes_pull_constants,
327 &brw_tes_ubo_surfaces,
328 &brw_tes_abo_surfaces,
329 &brw_gs_pull_constants,
330 &brw_gs_ubo_surfaces,
331 &brw_gs_abo_surfaces,
332 &brw_wm_pull_constants,
333 &brw_wm_ubo_surfaces,
334 &brw_wm_abo_surfaces,
335 &gen6_renderbuffer_surfaces,
336 &brw_texture_surfaces,
337 &brw_vs_binding_table,
338 &brw_tcs_binding_table,
339 &brw_tes_binding_table,
340 &brw_gs_binding_table,
341 &brw_wm_binding_table,
342
343 &brw_fs_samplers,
344 &brw_vs_samplers,
345 &brw_tcs_samplers,
346 &brw_tes_samplers,
347 &brw_gs_samplers,
348 &gen8_multisample_state,
349
350 &gen8_disable_stages,
351 &gen8_vs_state,
352 &gen8_hs_state,
353 &gen7_te_state,
354 &gen8_ds_state,
355 &gen8_gs_state,
356 &gen8_sol_state,
357 &gen6_clip_state,
358 &gen8_raster_state,
359 &gen8_sbe_state,
360 &gen8_sf_state,
361 &gen8_ps_blend,
362 &gen8_ps_extra,
363 &gen8_ps_state,
364 &gen8_wm_depth_stencil,
365 &gen8_wm_state,
366
367 &gen6_scissor_state,
368
369 &gen7_depthbuffer,
370
371 &brw_polygon_stipple,
372 &brw_polygon_stipple_offset,
373
374 &brw_line_stipple,
375 &brw_aa_line_parameters,
376
377 &brw_drawing_rect,
378
379 &gen8_vf_topology,
380
381 &brw_indices,
382 &gen8_index_buffer,
383 &gen8_vertices,
384
385 &haswell_cut_index,
386 &gen8_pma_fix,
387 };
388
389 static const struct brw_tracked_state *gen8_compute_atoms[] =
390 {
391 &gen8_state_base_address,
392 &gen7_l3_state,
393 &brw_cs_image_surfaces,
394 &gen7_cs_push_constants,
395 &brw_cs_pull_constants,
396 &brw_cs_ubo_surfaces,
397 &brw_cs_abo_surfaces,
398 &brw_cs_texture_surfaces,
399 &brw_cs_work_groups_surface,
400 &brw_cs_samplers,
401 &brw_cs_state,
402 };
403
404 static void
405 brw_upload_initial_gpu_state(struct brw_context *brw)
406 {
407 /* On platforms with hardware contexts, we can set our initial GPU state
408 * right away rather than doing it via state atoms. This saves a small
409 * amount of overhead on every draw call.
410 */
411 if (!brw->hw_ctx)
412 return;
413
414 if (brw->gen == 6)
415 brw_emit_post_sync_nonzero_flush(brw);
416
417 brw_upload_invariant_state(brw);
418
419 /* Recommended optimization for Victim Cache eviction in pixel backend. */
420 if (brw->gen >= 9) {
421 BEGIN_BATCH(3);
422 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
423 OUT_BATCH(GEN7_CACHE_MODE_1);
424 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
425 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
426 ADVANCE_BATCH();
427 }
428
429 if (brw->gen >= 8) {
430 gen8_emit_3dstate_sample_pattern(brw);
431 }
432 }
433
434 static inline const struct brw_tracked_state *
435 brw_get_pipeline_atoms(struct brw_context *brw,
436 enum brw_pipeline pipeline)
437 {
438 switch (pipeline) {
439 case BRW_RENDER_PIPELINE:
440 return brw->render_atoms;
441 case BRW_COMPUTE_PIPELINE:
442 return brw->compute_atoms;
443 default:
444 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
445 unreachable("Unsupported pipeline");
446 return NULL;
447 }
448 }
449
450 static void
451 brw_copy_pipeline_atoms(struct brw_context *brw,
452 enum brw_pipeline pipeline,
453 const struct brw_tracked_state **atoms,
454 int num_atoms)
455 {
456 /* This is to work around brw_context::atoms being declared const. We want
457 * it to be const, but it needs to be initialized somehow!
458 */
459 struct brw_tracked_state *context_atoms =
460 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
461
462 for (int i = 0; i < num_atoms; i++) {
463 context_atoms[i] = *atoms[i];
464 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
465 assert(context_atoms[i].emit);
466 }
467
468 brw->num_atoms[pipeline] = num_atoms;
469 }
470
471 void brw_init_state( struct brw_context *brw )
472 {
473 struct gl_context *ctx = &brw->ctx;
474
475 /* Force the first brw_select_pipeline to emit pipeline select */
476 brw->last_pipeline = BRW_NUM_PIPELINES;
477
478 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
479 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
480 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
481 ARRAY_SIZE(brw->render_atoms));
482 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
483 ARRAY_SIZE(brw->render_atoms));
484 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
485 ARRAY_SIZE(brw->compute_atoms));
486 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
487 ARRAY_SIZE(brw->compute_atoms));
488
489 brw_init_caches(brw);
490
491 if (brw->gen >= 8) {
492 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
493 gen8_render_atoms,
494 ARRAY_SIZE(gen8_render_atoms));
495 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
496 gen8_compute_atoms,
497 ARRAY_SIZE(gen8_compute_atoms));
498 } else if (brw->gen == 7) {
499 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
500 gen7_render_atoms,
501 ARRAY_SIZE(gen7_render_atoms));
502 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
503 gen7_compute_atoms,
504 ARRAY_SIZE(gen7_compute_atoms));
505 } else if (brw->gen == 6) {
506 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
507 gen6_atoms, ARRAY_SIZE(gen6_atoms));
508 } else {
509 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
510 gen4_atoms, ARRAY_SIZE(gen4_atoms));
511 }
512
513 brw_upload_initial_gpu_state(brw);
514
515 brw->NewGLState = ~0;
516 brw->ctx.NewDriverState = ~0ull;
517
518 /* ~0 is a nonsensical value which won't match anything we program, so
519 * the programming will take effect on the first time around.
520 */
521 brw->pma_stall_bits = ~0;
522
523 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
524 * dirty flags.
525 */
526 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
527
528 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
529 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
530 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
531 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
532 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
533 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
534 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
535 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
536 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
537 }
538
539
540 void brw_destroy_state( struct brw_context *brw )
541 {
542 brw_destroy_caches(brw);
543 }
544
545 /***********************************************************************
546 */
547
548 static bool
549 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
550 {
551 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
552 }
553
554 static void accumulate_state( struct brw_state_flags *a,
555 const struct brw_state_flags *b )
556 {
557 a->mesa |= b->mesa;
558 a->brw |= b->brw;
559 }
560
561
562 static void xor_states( struct brw_state_flags *result,
563 const struct brw_state_flags *a,
564 const struct brw_state_flags *b )
565 {
566 result->mesa = a->mesa ^ b->mesa;
567 result->brw = a->brw ^ b->brw;
568 }
569
570 struct dirty_bit_map {
571 uint64_t bit;
572 char *name;
573 uint32_t count;
574 };
575
576 #define DEFINE_BIT(name) {name, #name, 0}
577
578 static struct dirty_bit_map mesa_bits[] = {
579 DEFINE_BIT(_NEW_MODELVIEW),
580 DEFINE_BIT(_NEW_PROJECTION),
581 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
582 DEFINE_BIT(_NEW_COLOR),
583 DEFINE_BIT(_NEW_DEPTH),
584 DEFINE_BIT(_NEW_EVAL),
585 DEFINE_BIT(_NEW_FOG),
586 DEFINE_BIT(_NEW_HINT),
587 DEFINE_BIT(_NEW_LIGHT),
588 DEFINE_BIT(_NEW_LINE),
589 DEFINE_BIT(_NEW_PIXEL),
590 DEFINE_BIT(_NEW_POINT),
591 DEFINE_BIT(_NEW_POLYGON),
592 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
593 DEFINE_BIT(_NEW_SCISSOR),
594 DEFINE_BIT(_NEW_STENCIL),
595 DEFINE_BIT(_NEW_TEXTURE),
596 DEFINE_BIT(_NEW_TRANSFORM),
597 DEFINE_BIT(_NEW_VIEWPORT),
598 DEFINE_BIT(_NEW_ARRAY),
599 DEFINE_BIT(_NEW_RENDERMODE),
600 DEFINE_BIT(_NEW_BUFFERS),
601 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
602 DEFINE_BIT(_NEW_MULTISAMPLE),
603 DEFINE_BIT(_NEW_TRACK_MATRIX),
604 DEFINE_BIT(_NEW_PROGRAM),
605 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
606 DEFINE_BIT(_NEW_BUFFER_OBJECT),
607 DEFINE_BIT(_NEW_FRAG_CLAMP),
608 /* Avoid sign extension problems. */
609 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
610 {0, 0, 0}
611 };
612
613 static struct dirty_bit_map brw_bits[] = {
614 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
615 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
616 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
617 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
618 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
619 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
620 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
621 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
622 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
623 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
624 DEFINE_BIT(BRW_NEW_URB_FENCE),
625 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
626 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
627 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
628 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
629 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
630 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
631 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
632 DEFINE_BIT(BRW_NEW_PRIMITIVE),
633 DEFINE_BIT(BRW_NEW_CONTEXT),
634 DEFINE_BIT(BRW_NEW_PSP),
635 DEFINE_BIT(BRW_NEW_SURFACES),
636 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
637 DEFINE_BIT(BRW_NEW_INDICES),
638 DEFINE_BIT(BRW_NEW_VERTICES),
639 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
640 DEFINE_BIT(BRW_NEW_BATCH),
641 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
642 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
643 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
644 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
645 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
646 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
647 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
648 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
649 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
650 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
651 DEFINE_BIT(BRW_NEW_STATS_WM),
652 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
653 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
654 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
655 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
656 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
657 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
658 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
659 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
660 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
661 DEFINE_BIT(BRW_NEW_CC_VP),
662 DEFINE_BIT(BRW_NEW_SF_VP),
663 DEFINE_BIT(BRW_NEW_CLIP_VP),
664 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
665 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
666 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
667 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
668 DEFINE_BIT(BRW_NEW_URB_SIZE),
669 DEFINE_BIT(BRW_NEW_CC_STATE),
670 {0, 0, 0}
671 };
672
673 static void
674 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
675 {
676 for (int i = 0; bit_map[i].bit != 0; i++) {
677 if (bit_map[i].bit & bits)
678 bit_map[i].count++;
679 }
680 }
681
682 static void
683 brw_print_dirty_count(struct dirty_bit_map *bit_map)
684 {
685 for (int i = 0; bit_map[i].bit != 0; i++) {
686 if (bit_map[i].count > 1) {
687 fprintf(stderr, "0x%016lx: %12d (%s)\n",
688 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
689 }
690 }
691 }
692
693 static inline void
694 brw_upload_tess_programs(struct brw_context *brw)
695 {
696 if (brw->tess_eval_program) {
697 uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
698 uint32_t per_patch_slots =
699 brw->tess_eval_program->Base.PatchInputsRead;
700
701 /* The TCS may have additional outputs which aren't read by the
702 * TES (possibly for cross-thread communication). These need to
703 * be stored in the Patch URB Entry as well.
704 */
705 if (brw->tess_ctrl_program) {
706 per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
707 per_patch_slots |=
708 brw->tess_ctrl_program->Base.PatchOutputsWritten;
709 }
710
711 brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
712 brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
713 } else {
714 brw->tcs.prog_data = NULL;
715 brw->tcs.base.prog_data = NULL;
716 brw->tes.prog_data = NULL;
717 brw->tes.base.prog_data = NULL;
718 }
719 }
720
721 static inline void
722 brw_upload_programs(struct brw_context *brw,
723 enum brw_pipeline pipeline)
724 {
725 if (pipeline == BRW_RENDER_PIPELINE) {
726 brw_upload_vs_prog(brw);
727 brw_upload_tess_programs(brw);
728
729 if (brw->gen < 6)
730 brw_upload_ff_gs_prog(brw);
731 else
732 brw_upload_gs_prog(brw);
733
734 /* Update the VUE map for data exiting the GS stage of the pipeline.
735 * This comes from the last enabled shader stage.
736 */
737 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
738 bool old_separate = brw->vue_map_geom_out.separate;
739 if (brw->geometry_program)
740 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
741 else if (brw->tess_eval_program)
742 brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
743 else
744 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
745
746 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
747 if (old_slots != brw->vue_map_geom_out.slots_valid ||
748 old_separate != brw->vue_map_geom_out.separate)
749 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
750
751 brw_upload_wm_prog(brw);
752 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
753 brw_upload_cs_prog(brw);
754 }
755 }
756
757 static inline void
758 merge_ctx_state(struct brw_context *brw,
759 struct brw_state_flags *state)
760 {
761 state->mesa |= brw->NewGLState;
762 state->brw |= brw->ctx.NewDriverState;
763 }
764
765 static inline void
766 check_and_emit_atom(struct brw_context *brw,
767 struct brw_state_flags *state,
768 const struct brw_tracked_state *atom)
769 {
770 if (check_state(state, &atom->dirty)) {
771 atom->emit(brw);
772 merge_ctx_state(brw, state);
773 }
774 }
775
776 static inline void
777 brw_upload_pipeline_state(struct brw_context *brw,
778 enum brw_pipeline pipeline)
779 {
780 struct gl_context *ctx = &brw->ctx;
781 int i;
782 static int dirty_count = 0;
783 struct brw_state_flags state = brw->state.pipelines[pipeline];
784 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
785
786 brw_select_pipeline(brw, pipeline);
787
788 if (0) {
789 /* Always re-emit all state. */
790 brw->NewGLState = ~0;
791 ctx->NewDriverState = ~0ull;
792 }
793
794 if (pipeline == BRW_RENDER_PIPELINE) {
795 if (brw->fragment_program != ctx->FragmentProgram._Current) {
796 brw->fragment_program = ctx->FragmentProgram._Current;
797 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
798 }
799
800 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
801 brw->tess_eval_program = ctx->TessEvalProgram._Current;
802 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
803 }
804
805 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
806 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
807 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
808 }
809
810 if (brw->geometry_program != ctx->GeometryProgram._Current) {
811 brw->geometry_program = ctx->GeometryProgram._Current;
812 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
813 }
814
815 if (brw->vertex_program != ctx->VertexProgram._Current) {
816 brw->vertex_program = ctx->VertexProgram._Current;
817 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
818 }
819 }
820
821 if (brw->compute_program != ctx->ComputeProgram._Current) {
822 brw->compute_program = ctx->ComputeProgram._Current;
823 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
824 }
825
826 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
827 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
828 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
829 }
830
831 if (brw->num_samples != fb_samples) {
832 brw->num_samples = fb_samples;
833 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
834 }
835
836 /* Exit early if no state is flagged as dirty */
837 merge_ctx_state(brw, &state);
838 if ((state.mesa | state.brw) == 0)
839 return;
840
841 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
842 if (brw->gen == 6)
843 brw_emit_post_sync_nonzero_flush(brw);
844
845 brw_upload_programs(brw, pipeline);
846 merge_ctx_state(brw, &state);
847
848 const struct brw_tracked_state *atoms =
849 brw_get_pipeline_atoms(brw, pipeline);
850 const int num_atoms = brw->num_atoms[pipeline];
851
852 if (unlikely(INTEL_DEBUG)) {
853 /* Debug version which enforces various sanity checks on the
854 * state flags which are generated and checked to help ensure
855 * state atoms are ordered correctly in the list.
856 */
857 struct brw_state_flags examined, prev;
858 memset(&examined, 0, sizeof(examined));
859 prev = state;
860
861 for (i = 0; i < num_atoms; i++) {
862 const struct brw_tracked_state *atom = &atoms[i];
863 struct brw_state_flags generated;
864
865 check_and_emit_atom(brw, &state, atom);
866
867 accumulate_state(&examined, &atom->dirty);
868
869 /* generated = (prev ^ state)
870 * if (examined & generated)
871 * fail;
872 */
873 xor_states(&generated, &prev, &state);
874 assert(!check_state(&examined, &generated));
875 prev = state;
876 }
877 }
878 else {
879 for (i = 0; i < num_atoms; i++) {
880 const struct brw_tracked_state *atom = &atoms[i];
881
882 check_and_emit_atom(brw, &state, atom);
883 }
884 }
885
886 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
887 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
888
889 brw_update_dirty_count(mesa_bits, state.mesa);
890 brw_update_dirty_count(brw_bits, state.brw);
891 if (dirty_count++ % 1000 == 0) {
892 brw_print_dirty_count(mesa_bits);
893 brw_print_dirty_count(brw_bits);
894 fprintf(stderr, "\n");
895 }
896 }
897 }
898
899 /***********************************************************************
900 * Emit all state:
901 */
902 void brw_upload_render_state(struct brw_context *brw)
903 {
904 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
905 }
906
907 static inline void
908 brw_pipeline_state_finished(struct brw_context *brw,
909 enum brw_pipeline pipeline)
910 {
911 /* Save all dirty state into the other pipelines */
912 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
913 if (i != pipeline) {
914 brw->state.pipelines[i].mesa |= brw->NewGLState;
915 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
916 } else {
917 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
918 }
919 }
920
921 brw->NewGLState = 0;
922 brw->ctx.NewDriverState = 0ull;
923 }
924
925 /**
926 * Clear dirty bits to account for the fact that the state emitted by
927 * brw_upload_render_state() has been committed to the hardware. This is a
928 * separate call from brw_upload_render_state() because it's possible that
929 * after the call to brw_upload_render_state(), we will discover that we've
930 * run out of aperture space, and need to rewind the batch buffer to the state
931 * it had before the brw_upload_render_state() call.
932 */
933 void
934 brw_render_state_finished(struct brw_context *brw)
935 {
936 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
937 }
938
939 void
940 brw_upload_compute_state(struct brw_context *brw)
941 {
942 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
943 }
944
945 void
946 brw_compute_state_finished(struct brw_context *brw)
947 {
948 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
949 }