i965: Merge gen7_clip_state atom into gen6_clip_state atom.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 /* Once all the programs are done, we know how large urb entry
49 * sizes need to be and can decide if we need to change the urb
50 * layout.
51 */
52 &brw_curbe_offsets,
53 &brw_recalculate_urb_fence,
54
55 &brw_cc_vp,
56 &brw_cc_unit,
57
58 /* Surface state setup. Must come before the VS/WM unit. The binding
59 * table upload must be last.
60 */
61 &brw_vs_pull_constants,
62 &brw_wm_pull_constants,
63 &brw_renderbuffer_surfaces,
64 &brw_renderbuffer_read_surfaces,
65 &brw_texture_surfaces,
66 &brw_vs_binding_table,
67 &brw_wm_binding_table,
68
69 &brw_fs_samplers,
70 &brw_vs_samplers,
71
72 /* These set up state for brw_psp_urb_cbs */
73 &brw_wm_unit,
74 &brw_sf_vp,
75 &brw_sf_unit,
76 &brw_vs_unit, /* always required, enabled or not */
77 &brw_clip_unit,
78 &brw_gs_unit,
79
80 /* Command packets:
81 */
82 &brw_invariant_state,
83
84 &brw_binding_table_pointers,
85 &brw_blend_constant_color,
86
87 &brw_depthbuffer,
88
89 &brw_polygon_stipple,
90 &brw_polygon_stipple_offset,
91
92 &brw_line_stipple,
93 &brw_aa_line_parameters,
94
95 &brw_psp_urb_cbs,
96
97 &brw_drawing_rect,
98 &brw_indices, /* must come before brw_vertices */
99 &brw_index_buffer,
100 &brw_vertices,
101
102 &brw_constant_buffer
103 };
104
105 static const struct brw_tracked_state *gen6_atoms[] =
106 {
107 &gen6_clip_vp,
108 &gen6_sf_vp,
109
110 /* Command packets: */
111
112 &brw_cc_vp,
113 &gen6_viewport_state, /* must do after *_vp stages */
114
115 &gen6_urb,
116 &gen6_blend_state, /* must do before cc unit */
117 &gen6_color_calc_state, /* must do before cc unit */
118 &gen6_depth_stencil_state, /* must do before cc unit */
119
120 &gen6_vs_push_constants, /* Before vs_state */
121 &gen6_gs_push_constants, /* Before gs_state */
122 &gen6_wm_push_constants, /* Before wm_state */
123
124 /* Surface state setup. Must come before the VS/WM unit. The binding
125 * table upload must be last.
126 */
127 &brw_vs_pull_constants,
128 &brw_vs_ubo_surfaces,
129 &brw_gs_pull_constants,
130 &brw_gs_ubo_surfaces,
131 &brw_wm_pull_constants,
132 &brw_wm_ubo_surfaces,
133 &gen6_renderbuffer_surfaces,
134 &brw_renderbuffer_read_surfaces,
135 &brw_texture_surfaces,
136 &gen6_sol_surface,
137 &brw_vs_binding_table,
138 &gen6_gs_binding_table,
139 &brw_wm_binding_table,
140
141 &brw_fs_samplers,
142 &brw_vs_samplers,
143 &brw_gs_samplers,
144 &gen6_sampler_state,
145 &gen6_multisample_state,
146
147 &gen6_vs_state,
148 &gen6_gs_state,
149 &gen6_clip_state,
150 &gen6_sf_state,
151 &gen6_wm_state,
152
153 &gen6_scissor_state,
154
155 &gen6_binding_table_pointers,
156
157 &brw_depthbuffer,
158
159 &brw_polygon_stipple,
160 &brw_polygon_stipple_offset,
161
162 &brw_line_stipple,
163 &brw_aa_line_parameters,
164
165 &brw_drawing_rect,
166
167 &brw_indices, /* must come before brw_vertices */
168 &brw_index_buffer,
169 &brw_vertices,
170 };
171
172 static const struct brw_tracked_state *gen7_render_atoms[] =
173 {
174 /* Command packets: */
175
176 &brw_cc_vp,
177 &gen7_sf_clip_viewport,
178
179 &gen7_l3_state,
180 &gen7_push_constant_space,
181 &gen7_urb,
182 &gen6_blend_state, /* must do before cc unit */
183 &gen6_color_calc_state, /* must do before cc unit */
184 &gen6_depth_stencil_state, /* must do before cc unit */
185
186 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
187
188 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
189 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
190 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
191 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
192 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
193
194 &gen6_vs_push_constants, /* Before vs_state */
195 &gen7_tcs_push_constants,
196 &gen7_tes_push_constants,
197 &gen6_gs_push_constants, /* Before gs_state */
198 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
199
200 /* Surface state setup. Must come before the VS/WM unit. The binding
201 * table upload must be last.
202 */
203 &brw_vs_pull_constants,
204 &brw_vs_ubo_surfaces,
205 &brw_vs_abo_surfaces,
206 &brw_tcs_pull_constants,
207 &brw_tcs_ubo_surfaces,
208 &brw_tcs_abo_surfaces,
209 &brw_tes_pull_constants,
210 &brw_tes_ubo_surfaces,
211 &brw_tes_abo_surfaces,
212 &brw_gs_pull_constants,
213 &brw_gs_ubo_surfaces,
214 &brw_gs_abo_surfaces,
215 &brw_wm_pull_constants,
216 &brw_wm_ubo_surfaces,
217 &brw_wm_abo_surfaces,
218 &gen6_renderbuffer_surfaces,
219 &brw_renderbuffer_read_surfaces,
220 &brw_texture_surfaces,
221 &brw_vs_binding_table,
222 &brw_tcs_binding_table,
223 &brw_tes_binding_table,
224 &brw_gs_binding_table,
225 &brw_wm_binding_table,
226
227 &brw_fs_samplers,
228 &brw_vs_samplers,
229 &brw_tcs_samplers,
230 &brw_tes_samplers,
231 &brw_gs_samplers,
232 &gen6_multisample_state,
233
234 &gen7_vs_state,
235 &gen7_hs_state,
236 &gen7_te_state,
237 &gen7_ds_state,
238 &gen7_gs_state,
239 &gen7_sol_state,
240 &gen6_clip_state,
241 &gen7_sbe_state,
242 &gen7_sf_state,
243 &gen7_wm_state,
244 &gen7_ps_state,
245
246 &gen6_scissor_state,
247
248 &gen7_depthbuffer,
249
250 &brw_polygon_stipple,
251 &brw_polygon_stipple_offset,
252
253 &brw_line_stipple,
254 &brw_aa_line_parameters,
255
256 &brw_drawing_rect,
257
258 &brw_indices, /* must come before brw_vertices */
259 &brw_index_buffer,
260 &brw_vertices,
261
262 &haswell_cut_index,
263 };
264
265 static const struct brw_tracked_state *gen7_compute_atoms[] =
266 {
267 &gen7_l3_state,
268 &brw_cs_image_surfaces,
269 &gen7_cs_push_constants,
270 &brw_cs_pull_constants,
271 &brw_cs_ubo_surfaces,
272 &brw_cs_abo_surfaces,
273 &brw_cs_texture_surfaces,
274 &brw_cs_work_groups_surface,
275 &brw_cs_samplers,
276 &brw_cs_state,
277 };
278
279 static const struct brw_tracked_state *gen8_render_atoms[] =
280 {
281 &brw_cc_vp,
282 &gen8_sf_clip_viewport,
283
284 &gen7_l3_state,
285 &gen7_push_constant_space,
286 &gen7_urb,
287 &gen8_blend_state,
288 &gen6_color_calc_state,
289
290 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
291
292 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
293 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
294 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
295 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
296 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
297
298 &gen6_vs_push_constants, /* Before vs_state */
299 &gen7_tcs_push_constants,
300 &gen7_tes_push_constants,
301 &gen6_gs_push_constants, /* Before gs_state */
302 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
303
304 /* Surface state setup. Must come before the VS/WM unit. The binding
305 * table upload must be last.
306 */
307 &brw_vs_pull_constants,
308 &brw_vs_ubo_surfaces,
309 &brw_vs_abo_surfaces,
310 &brw_tcs_pull_constants,
311 &brw_tcs_ubo_surfaces,
312 &brw_tcs_abo_surfaces,
313 &brw_tes_pull_constants,
314 &brw_tes_ubo_surfaces,
315 &brw_tes_abo_surfaces,
316 &brw_gs_pull_constants,
317 &brw_gs_ubo_surfaces,
318 &brw_gs_abo_surfaces,
319 &brw_wm_pull_constants,
320 &brw_wm_ubo_surfaces,
321 &brw_wm_abo_surfaces,
322 &gen6_renderbuffer_surfaces,
323 &brw_renderbuffer_read_surfaces,
324 &brw_texture_surfaces,
325 &brw_vs_binding_table,
326 &brw_tcs_binding_table,
327 &brw_tes_binding_table,
328 &brw_gs_binding_table,
329 &brw_wm_binding_table,
330
331 &brw_fs_samplers,
332 &brw_vs_samplers,
333 &brw_tcs_samplers,
334 &brw_tes_samplers,
335 &brw_gs_samplers,
336 &gen8_multisample_state,
337
338 &gen8_disable_stages,
339 &gen8_vs_state,
340 &gen8_hs_state,
341 &gen7_te_state,
342 &gen8_ds_state,
343 &gen8_gs_state,
344 &gen7_sol_state,
345 &gen6_clip_state,
346 &gen8_raster_state,
347 &gen8_sbe_state,
348 &gen8_sf_state,
349 &gen8_ps_blend,
350 &gen8_ps_extra,
351 &gen8_ps_state,
352 &gen8_wm_depth_stencil,
353 &gen8_wm_state,
354
355 &gen6_scissor_state,
356
357 &gen7_depthbuffer,
358
359 &brw_polygon_stipple,
360 &brw_polygon_stipple_offset,
361
362 &brw_line_stipple,
363 &brw_aa_line_parameters,
364
365 &brw_drawing_rect,
366
367 &gen8_vf_topology,
368
369 &brw_indices,
370 &gen8_index_buffer,
371 &gen8_vertices,
372
373 &haswell_cut_index,
374 &gen8_pma_fix,
375 };
376
377 static const struct brw_tracked_state *gen8_compute_atoms[] =
378 {
379 &gen7_l3_state,
380 &brw_cs_image_surfaces,
381 &gen7_cs_push_constants,
382 &brw_cs_pull_constants,
383 &brw_cs_ubo_surfaces,
384 &brw_cs_abo_surfaces,
385 &brw_cs_texture_surfaces,
386 &brw_cs_work_groups_surface,
387 &brw_cs_samplers,
388 &brw_cs_state,
389 };
390
391 static void
392 brw_upload_initial_gpu_state(struct brw_context *brw)
393 {
394 /* On platforms with hardware contexts, we can set our initial GPU state
395 * right away rather than doing it via state atoms. This saves a small
396 * amount of overhead on every draw call.
397 */
398 if (!brw->hw_ctx)
399 return;
400
401 if (brw->gen == 6)
402 brw_emit_post_sync_nonzero_flush(brw);
403
404 brw_upload_invariant_state(brw);
405
406 /* Recommended optimization for Victim Cache eviction in pixel backend. */
407 if (brw->gen >= 9) {
408 BEGIN_BATCH(3);
409 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
410 OUT_BATCH(GEN7_CACHE_MODE_1);
411 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
412 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
413 ADVANCE_BATCH();
414 }
415
416 if (brw->gen >= 8) {
417 gen8_emit_3dstate_sample_pattern(brw);
418 }
419 }
420
421 static inline const struct brw_tracked_state *
422 brw_get_pipeline_atoms(struct brw_context *brw,
423 enum brw_pipeline pipeline)
424 {
425 switch (pipeline) {
426 case BRW_RENDER_PIPELINE:
427 return brw->render_atoms;
428 case BRW_COMPUTE_PIPELINE:
429 return brw->compute_atoms;
430 default:
431 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
432 unreachable("Unsupported pipeline");
433 return NULL;
434 }
435 }
436
437 static void
438 brw_copy_pipeline_atoms(struct brw_context *brw,
439 enum brw_pipeline pipeline,
440 const struct brw_tracked_state **atoms,
441 int num_atoms)
442 {
443 /* This is to work around brw_context::atoms being declared const. We want
444 * it to be const, but it needs to be initialized somehow!
445 */
446 struct brw_tracked_state *context_atoms =
447 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
448
449 for (int i = 0; i < num_atoms; i++) {
450 context_atoms[i] = *atoms[i];
451 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
452 assert(context_atoms[i].emit);
453 }
454
455 brw->num_atoms[pipeline] = num_atoms;
456 }
457
458 void brw_init_state( struct brw_context *brw )
459 {
460 struct gl_context *ctx = &brw->ctx;
461
462 /* Force the first brw_select_pipeline to emit pipeline select */
463 brw->last_pipeline = BRW_NUM_PIPELINES;
464
465 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
466 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
467 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
468 ARRAY_SIZE(brw->render_atoms));
469 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
470 ARRAY_SIZE(brw->render_atoms));
471 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
472 ARRAY_SIZE(brw->compute_atoms));
473 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
474 ARRAY_SIZE(brw->compute_atoms));
475
476 brw_init_caches(brw);
477
478 if (brw->gen >= 8) {
479 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
480 gen8_render_atoms,
481 ARRAY_SIZE(gen8_render_atoms));
482 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
483 gen8_compute_atoms,
484 ARRAY_SIZE(gen8_compute_atoms));
485 } else if (brw->gen == 7) {
486 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
487 gen7_render_atoms,
488 ARRAY_SIZE(gen7_render_atoms));
489 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
490 gen7_compute_atoms,
491 ARRAY_SIZE(gen7_compute_atoms));
492 } else if (brw->gen == 6) {
493 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
494 gen6_atoms, ARRAY_SIZE(gen6_atoms));
495 } else {
496 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
497 gen4_atoms, ARRAY_SIZE(gen4_atoms));
498 }
499
500 brw_upload_initial_gpu_state(brw);
501
502 brw->NewGLState = ~0;
503 brw->ctx.NewDriverState = ~0ull;
504
505 /* ~0 is a nonsensical value which won't match anything we program, so
506 * the programming will take effect on the first time around.
507 */
508 brw->pma_stall_bits = ~0;
509
510 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
511 * dirty flags.
512 */
513 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
514
515 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
516 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
517 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
518 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
519 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
520 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
521 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
522 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
523 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
524 }
525
526
527 void brw_destroy_state( struct brw_context *brw )
528 {
529 brw_destroy_caches(brw);
530 }
531
532 /***********************************************************************
533 */
534
535 static bool
536 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
537 {
538 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
539 }
540
541 static void accumulate_state( struct brw_state_flags *a,
542 const struct brw_state_flags *b )
543 {
544 a->mesa |= b->mesa;
545 a->brw |= b->brw;
546 }
547
548
549 static void xor_states( struct brw_state_flags *result,
550 const struct brw_state_flags *a,
551 const struct brw_state_flags *b )
552 {
553 result->mesa = a->mesa ^ b->mesa;
554 result->brw = a->brw ^ b->brw;
555 }
556
557 struct dirty_bit_map {
558 uint64_t bit;
559 char *name;
560 uint32_t count;
561 };
562
563 #define DEFINE_BIT(name) {name, #name, 0}
564
565 static struct dirty_bit_map mesa_bits[] = {
566 DEFINE_BIT(_NEW_MODELVIEW),
567 DEFINE_BIT(_NEW_PROJECTION),
568 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
569 DEFINE_BIT(_NEW_COLOR),
570 DEFINE_BIT(_NEW_DEPTH),
571 DEFINE_BIT(_NEW_EVAL),
572 DEFINE_BIT(_NEW_FOG),
573 DEFINE_BIT(_NEW_HINT),
574 DEFINE_BIT(_NEW_LIGHT),
575 DEFINE_BIT(_NEW_LINE),
576 DEFINE_BIT(_NEW_PIXEL),
577 DEFINE_BIT(_NEW_POINT),
578 DEFINE_BIT(_NEW_POLYGON),
579 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
580 DEFINE_BIT(_NEW_SCISSOR),
581 DEFINE_BIT(_NEW_STENCIL),
582 DEFINE_BIT(_NEW_TEXTURE),
583 DEFINE_BIT(_NEW_TRANSFORM),
584 DEFINE_BIT(_NEW_VIEWPORT),
585 DEFINE_BIT(_NEW_ARRAY),
586 DEFINE_BIT(_NEW_RENDERMODE),
587 DEFINE_BIT(_NEW_BUFFERS),
588 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
589 DEFINE_BIT(_NEW_MULTISAMPLE),
590 DEFINE_BIT(_NEW_TRACK_MATRIX),
591 DEFINE_BIT(_NEW_PROGRAM),
592 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
593 DEFINE_BIT(_NEW_BUFFER_OBJECT),
594 DEFINE_BIT(_NEW_FRAG_CLAMP),
595 /* Avoid sign extension problems. */
596 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
597 {0, 0, 0}
598 };
599
600 static struct dirty_bit_map brw_bits[] = {
601 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
602 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
603 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
604 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
605 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
606 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
607 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
608 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
609 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
610 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
611 DEFINE_BIT(BRW_NEW_URB_FENCE),
612 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
613 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
614 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
615 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
616 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
617 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
618 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
619 DEFINE_BIT(BRW_NEW_PRIMITIVE),
620 DEFINE_BIT(BRW_NEW_CONTEXT),
621 DEFINE_BIT(BRW_NEW_PSP),
622 DEFINE_BIT(BRW_NEW_SURFACES),
623 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
624 DEFINE_BIT(BRW_NEW_INDICES),
625 DEFINE_BIT(BRW_NEW_VERTICES),
626 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
627 DEFINE_BIT(BRW_NEW_BATCH),
628 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
629 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
630 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
631 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
632 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
633 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
634 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
635 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
636 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
637 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
638 DEFINE_BIT(BRW_NEW_STATS_WM),
639 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
640 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
641 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
642 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
643 DEFINE_BIT(BRW_NEW_INTERPOLATION_MAP),
644 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
645 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
646 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
647 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
648 DEFINE_BIT(BRW_NEW_CC_VP),
649 DEFINE_BIT(BRW_NEW_SF_VP),
650 DEFINE_BIT(BRW_NEW_CLIP_VP),
651 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
652 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
653 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
654 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
655 DEFINE_BIT(BRW_NEW_URB_SIZE),
656 DEFINE_BIT(BRW_NEW_CC_STATE),
657 DEFINE_BIT(BRW_NEW_BLORP),
658 {0, 0, 0}
659 };
660
661 static void
662 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
663 {
664 for (int i = 0; bit_map[i].bit != 0; i++) {
665 if (bit_map[i].bit & bits)
666 bit_map[i].count++;
667 }
668 }
669
670 static void
671 brw_print_dirty_count(struct dirty_bit_map *bit_map)
672 {
673 for (int i = 0; bit_map[i].bit != 0; i++) {
674 if (bit_map[i].count > 1) {
675 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
676 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
677 }
678 }
679 }
680
681 static inline void
682 brw_upload_tess_programs(struct brw_context *brw)
683 {
684 if (brw->tess_eval_program) {
685 uint64_t per_vertex_slots = brw->tess_eval_program->Base.InputsRead;
686 uint32_t per_patch_slots =
687 brw->tess_eval_program->Base.PatchInputsRead;
688
689 /* The TCS may have additional outputs which aren't read by the
690 * TES (possibly for cross-thread communication). These need to
691 * be stored in the Patch URB Entry as well.
692 */
693 if (brw->tess_ctrl_program) {
694 per_vertex_slots |= brw->tess_ctrl_program->Base.OutputsWritten;
695 per_patch_slots |=
696 brw->tess_ctrl_program->Base.PatchOutputsWritten;
697 }
698
699 brw_upload_tcs_prog(brw, per_vertex_slots, per_patch_slots);
700 brw_upload_tes_prog(brw, per_vertex_slots, per_patch_slots);
701 } else {
702 brw->tcs.prog_data = NULL;
703 brw->tcs.base.prog_data = NULL;
704 brw->tes.prog_data = NULL;
705 brw->tes.base.prog_data = NULL;
706 }
707 }
708
709 static inline void
710 brw_upload_programs(struct brw_context *brw,
711 enum brw_pipeline pipeline)
712 {
713 if (pipeline == BRW_RENDER_PIPELINE) {
714 brw_upload_vs_prog(brw);
715 brw_upload_tess_programs(brw);
716
717 if (brw->gen < 6)
718 brw_upload_ff_gs_prog(brw);
719 else
720 brw_upload_gs_prog(brw);
721
722 /* Update the VUE map for data exiting the GS stage of the pipeline.
723 * This comes from the last enabled shader stage.
724 */
725 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
726 bool old_separate = brw->vue_map_geom_out.separate;
727 if (brw->geometry_program)
728 brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
729 else if (brw->tess_eval_program)
730 brw->vue_map_geom_out = brw->tes.prog_data->base.vue_map;
731 else
732 brw->vue_map_geom_out = brw->vs.prog_data->base.vue_map;
733
734 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
735 if (old_slots != brw->vue_map_geom_out.slots_valid ||
736 old_separate != brw->vue_map_geom_out.separate)
737 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
738
739 if (brw->gen < 6) {
740 brw_setup_vue_interpolation(brw);
741 brw_upload_clip_prog(brw);
742 brw_upload_sf_prog(brw);
743 }
744
745 brw_upload_wm_prog(brw);
746 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
747 brw_upload_cs_prog(brw);
748 }
749 }
750
751 static inline void
752 merge_ctx_state(struct brw_context *brw,
753 struct brw_state_flags *state)
754 {
755 state->mesa |= brw->NewGLState;
756 state->brw |= brw->ctx.NewDriverState;
757 }
758
759 static inline void
760 check_and_emit_atom(struct brw_context *brw,
761 struct brw_state_flags *state,
762 const struct brw_tracked_state *atom)
763 {
764 if (check_state(state, &atom->dirty)) {
765 atom->emit(brw);
766 merge_ctx_state(brw, state);
767 }
768 }
769
770 static inline void
771 brw_upload_pipeline_state(struct brw_context *brw,
772 enum brw_pipeline pipeline)
773 {
774 struct gl_context *ctx = &brw->ctx;
775 int i;
776 static int dirty_count = 0;
777 struct brw_state_flags state = brw->state.pipelines[pipeline];
778 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
779
780 brw_select_pipeline(brw, pipeline);
781
782 if (0) {
783 /* Always re-emit all state. */
784 brw->NewGLState = ~0;
785 ctx->NewDriverState = ~0ull;
786 }
787
788 if (pipeline == BRW_RENDER_PIPELINE) {
789 if (brw->fragment_program != ctx->FragmentProgram._Current) {
790 brw->fragment_program = ctx->FragmentProgram._Current;
791 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
792 }
793
794 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
795 brw->tess_eval_program = ctx->TessEvalProgram._Current;
796 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
797 }
798
799 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
800 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
801 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
802 }
803
804 if (brw->geometry_program != ctx->GeometryProgram._Current) {
805 brw->geometry_program = ctx->GeometryProgram._Current;
806 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
807 }
808
809 if (brw->vertex_program != ctx->VertexProgram._Current) {
810 brw->vertex_program = ctx->VertexProgram._Current;
811 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
812 }
813 }
814
815 if (brw->compute_program != ctx->ComputeProgram._Current) {
816 brw->compute_program = ctx->ComputeProgram._Current;
817 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
818 }
819
820 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
821 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
822 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
823 }
824
825 if (brw->num_samples != fb_samples) {
826 brw->num_samples = fb_samples;
827 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
828 }
829
830 /* Exit early if no state is flagged as dirty */
831 merge_ctx_state(brw, &state);
832 if ((state.mesa | state.brw) == 0)
833 return;
834
835 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
836 if (brw->gen == 6)
837 brw_emit_post_sync_nonzero_flush(brw);
838
839 brw_upload_programs(brw, pipeline);
840 merge_ctx_state(brw, &state);
841
842 brw_upload_state_base_address(brw);
843
844 const struct brw_tracked_state *atoms =
845 brw_get_pipeline_atoms(brw, pipeline);
846 const int num_atoms = brw->num_atoms[pipeline];
847
848 if (unlikely(INTEL_DEBUG)) {
849 /* Debug version which enforces various sanity checks on the
850 * state flags which are generated and checked to help ensure
851 * state atoms are ordered correctly in the list.
852 */
853 struct brw_state_flags examined, prev;
854 memset(&examined, 0, sizeof(examined));
855 prev = state;
856
857 for (i = 0; i < num_atoms; i++) {
858 const struct brw_tracked_state *atom = &atoms[i];
859 struct brw_state_flags generated;
860
861 check_and_emit_atom(brw, &state, atom);
862
863 accumulate_state(&examined, &atom->dirty);
864
865 /* generated = (prev ^ state)
866 * if (examined & generated)
867 * fail;
868 */
869 xor_states(&generated, &prev, &state);
870 assert(!check_state(&examined, &generated));
871 prev = state;
872 }
873 }
874 else {
875 for (i = 0; i < num_atoms; i++) {
876 const struct brw_tracked_state *atom = &atoms[i];
877
878 check_and_emit_atom(brw, &state, atom);
879 }
880 }
881
882 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
883 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
884
885 brw_update_dirty_count(mesa_bits, state.mesa);
886 brw_update_dirty_count(brw_bits, state.brw);
887 if (dirty_count++ % 1000 == 0) {
888 brw_print_dirty_count(mesa_bits);
889 brw_print_dirty_count(brw_bits);
890 fprintf(stderr, "\n");
891 }
892 }
893 }
894
895 /***********************************************************************
896 * Emit all state:
897 */
898 void brw_upload_render_state(struct brw_context *brw)
899 {
900 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
901 }
902
903 static inline void
904 brw_pipeline_state_finished(struct brw_context *brw,
905 enum brw_pipeline pipeline)
906 {
907 /* Save all dirty state into the other pipelines */
908 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
909 if (i != pipeline) {
910 brw->state.pipelines[i].mesa |= brw->NewGLState;
911 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
912 } else {
913 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
914 }
915 }
916
917 brw->NewGLState = 0;
918 brw->ctx.NewDriverState = 0ull;
919 }
920
921 /**
922 * Clear dirty bits to account for the fact that the state emitted by
923 * brw_upload_render_state() has been committed to the hardware. This is a
924 * separate call from brw_upload_render_state() because it's possible that
925 * after the call to brw_upload_render_state(), we will discover that we've
926 * run out of aperture space, and need to rewind the batch buffer to the state
927 * it had before the brw_upload_render_state() call.
928 */
929 void
930 brw_render_state_finished(struct brw_context *brw)
931 {
932 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
933 }
934
935 void
936 brw_upload_compute_state(struct brw_context *brw)
937 {
938 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
939 }
940
941 void
942 brw_compute_state_finished(struct brw_context *brw)
943 {
944 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
945 }