i965: Program 3DSTATE_AA_LINE_PARAMETERS in upload_invariant_state
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_state.h"
36 #include "drivers/common/meta.h"
37 #include "intel_batchbuffer.h"
38 #include "intel_buffers.h"
39 #include "brw_vs.h"
40 #include "brw_ff_gs.h"
41 #include "brw_gs.h"
42 #include "brw_wm.h"
43 #include "brw_cs.h"
44 #include "main/framebuffer.h"
45
46 static const struct brw_tracked_state *gen4_atoms[] =
47 {
48 /* Once all the programs are done, we know how large urb entry
49 * sizes need to be and can decide if we need to change the urb
50 * layout.
51 */
52 &brw_curbe_offsets,
53 &brw_recalculate_urb_fence,
54
55 &brw_cc_vp,
56 &brw_cc_unit,
57
58 /* Surface state setup. Must come before the VS/WM unit. The binding
59 * table upload must be last.
60 */
61 &brw_vs_pull_constants,
62 &brw_wm_pull_constants,
63 &brw_renderbuffer_surfaces,
64 &brw_renderbuffer_read_surfaces,
65 &brw_texture_surfaces,
66 &brw_vs_binding_table,
67 &brw_wm_binding_table,
68
69 &brw_fs_samplers,
70 &brw_vs_samplers,
71
72 /* These set up state for brw_psp_urb_cbs */
73 &brw_wm_unit,
74 &brw_sf_vp,
75 &brw_sf_unit,
76 &brw_vs_unit, /* always required, enabled or not */
77 &brw_clip_unit,
78 &brw_gs_unit,
79
80 /* Command packets:
81 */
82 &brw_invariant_state,
83
84 &brw_binding_table_pointers,
85 &brw_blend_constant_color,
86
87 &brw_depthbuffer,
88
89 &brw_polygon_stipple,
90 &brw_polygon_stipple_offset,
91
92 &brw_line_stipple,
93
94 &brw_psp_urb_cbs,
95
96 &brw_drawing_rect,
97 &brw_indices, /* must come before brw_vertices */
98 &brw_index_buffer,
99 &brw_vertices,
100
101 &brw_constant_buffer
102 };
103
104 static const struct brw_tracked_state *gen6_atoms[] =
105 {
106 &gen6_clip_vp,
107 &gen6_sf_vp,
108
109 /* Command packets: */
110
111 &brw_cc_vp,
112 &gen6_viewport_state, /* must do after *_vp stages */
113
114 &gen6_urb,
115 &gen6_blend_state, /* must do before cc unit */
116 &gen6_color_calc_state, /* must do before cc unit */
117 &gen6_depth_stencil_state, /* must do before cc unit */
118
119 &gen6_vs_push_constants, /* Before vs_state */
120 &gen6_gs_push_constants, /* Before gs_state */
121 &gen6_wm_push_constants, /* Before wm_state */
122
123 /* Surface state setup. Must come before the VS/WM unit. The binding
124 * table upload must be last.
125 */
126 &brw_vs_pull_constants,
127 &brw_vs_ubo_surfaces,
128 &brw_gs_pull_constants,
129 &brw_gs_ubo_surfaces,
130 &brw_wm_pull_constants,
131 &brw_wm_ubo_surfaces,
132 &gen6_renderbuffer_surfaces,
133 &brw_renderbuffer_read_surfaces,
134 &brw_texture_surfaces,
135 &gen6_sol_surface,
136 &brw_vs_binding_table,
137 &gen6_gs_binding_table,
138 &brw_wm_binding_table,
139
140 &brw_fs_samplers,
141 &brw_vs_samplers,
142 &brw_gs_samplers,
143 &gen6_sampler_state,
144 &gen6_multisample_state,
145
146 &gen6_vs_state,
147 &gen6_gs_state,
148 &gen6_clip_state,
149 &gen6_sf_state,
150 &gen6_wm_state,
151
152 &gen6_scissor_state,
153
154 &gen6_binding_table_pointers,
155
156 &brw_depthbuffer,
157
158 &brw_polygon_stipple,
159 &brw_polygon_stipple_offset,
160
161 &brw_line_stipple,
162
163 &brw_drawing_rect,
164
165 &brw_indices, /* must come before brw_vertices */
166 &brw_index_buffer,
167 &brw_vertices,
168 };
169
170 static const struct brw_tracked_state *gen7_render_atoms[] =
171 {
172 /* Command packets: */
173
174 &brw_cc_vp,
175 &gen7_sf_clip_viewport,
176
177 &gen7_l3_state,
178 &gen7_push_constant_space,
179 &gen7_urb,
180 &gen6_blend_state, /* must do before cc unit */
181 &gen6_color_calc_state, /* must do before cc unit */
182 &gen6_depth_stencil_state, /* must do before cc unit */
183
184 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Haswell */
185
186 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
187 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
188 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
189 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
190 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
191
192 &gen6_vs_push_constants, /* Before vs_state */
193 &gen7_tcs_push_constants,
194 &gen7_tes_push_constants,
195 &gen6_gs_push_constants, /* Before gs_state */
196 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
197
198 /* Surface state setup. Must come before the VS/WM unit. The binding
199 * table upload must be last.
200 */
201 &brw_vs_pull_constants,
202 &brw_vs_ubo_surfaces,
203 &brw_vs_abo_surfaces,
204 &brw_tcs_pull_constants,
205 &brw_tcs_ubo_surfaces,
206 &brw_tcs_abo_surfaces,
207 &brw_tes_pull_constants,
208 &brw_tes_ubo_surfaces,
209 &brw_tes_abo_surfaces,
210 &brw_gs_pull_constants,
211 &brw_gs_ubo_surfaces,
212 &brw_gs_abo_surfaces,
213 &brw_wm_pull_constants,
214 &brw_wm_ubo_surfaces,
215 &brw_wm_abo_surfaces,
216 &gen6_renderbuffer_surfaces,
217 &brw_renderbuffer_read_surfaces,
218 &brw_texture_surfaces,
219 &brw_vs_binding_table,
220 &brw_tcs_binding_table,
221 &brw_tes_binding_table,
222 &brw_gs_binding_table,
223 &brw_wm_binding_table,
224
225 &brw_fs_samplers,
226 &brw_vs_samplers,
227 &brw_tcs_samplers,
228 &brw_tes_samplers,
229 &brw_gs_samplers,
230 &gen6_multisample_state,
231
232 &gen7_vs_state,
233 &gen7_hs_state,
234 &gen7_te_state,
235 &gen7_ds_state,
236 &gen7_gs_state,
237 &gen7_sol_state,
238 &gen6_clip_state,
239 &gen7_sbe_state,
240 &gen7_sf_state,
241 &gen7_wm_state,
242 &gen7_ps_state,
243
244 &gen6_scissor_state,
245
246 &gen7_depthbuffer,
247
248 &brw_polygon_stipple,
249 &brw_polygon_stipple_offset,
250
251 &brw_line_stipple,
252
253 &brw_drawing_rect,
254
255 &brw_indices, /* must come before brw_vertices */
256 &brw_index_buffer,
257 &brw_vertices,
258
259 &haswell_cut_index,
260 };
261
262 static const struct brw_tracked_state *gen7_compute_atoms[] =
263 {
264 &gen7_l3_state,
265 &brw_cs_image_surfaces,
266 &gen7_cs_push_constants,
267 &brw_cs_pull_constants,
268 &brw_cs_ubo_surfaces,
269 &brw_cs_abo_surfaces,
270 &brw_cs_texture_surfaces,
271 &brw_cs_work_groups_surface,
272 &brw_cs_samplers,
273 &brw_cs_state,
274 };
275
276 static const struct brw_tracked_state *gen8_render_atoms[] =
277 {
278 &brw_cc_vp,
279 &gen8_sf_clip_viewport,
280
281 &gen7_l3_state,
282 &gen7_push_constant_space,
283 &gen7_urb,
284 &gen8_blend_state,
285 &gen6_color_calc_state,
286
287 &gen7_hw_binding_tables, /* Enable hw-generated binding tables for Broadwell */
288
289 &brw_vs_image_surfaces, /* Before vs push/pull constants and binding table */
290 &brw_tcs_image_surfaces, /* Before tcs push/pull constants and binding table */
291 &brw_tes_image_surfaces, /* Before tes push/pull constants and binding table */
292 &brw_gs_image_surfaces, /* Before gs push/pull constants and binding table */
293 &brw_wm_image_surfaces, /* Before wm push/pull constants and binding table */
294
295 &gen6_vs_push_constants, /* Before vs_state */
296 &gen7_tcs_push_constants,
297 &gen7_tes_push_constants,
298 &gen6_gs_push_constants, /* Before gs_state */
299 &gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
300
301 /* Surface state setup. Must come before the VS/WM unit. The binding
302 * table upload must be last.
303 */
304 &brw_vs_pull_constants,
305 &brw_vs_ubo_surfaces,
306 &brw_vs_abo_surfaces,
307 &brw_tcs_pull_constants,
308 &brw_tcs_ubo_surfaces,
309 &brw_tcs_abo_surfaces,
310 &brw_tes_pull_constants,
311 &brw_tes_ubo_surfaces,
312 &brw_tes_abo_surfaces,
313 &brw_gs_pull_constants,
314 &brw_gs_ubo_surfaces,
315 &brw_gs_abo_surfaces,
316 &brw_wm_pull_constants,
317 &brw_wm_ubo_surfaces,
318 &brw_wm_abo_surfaces,
319 &gen6_renderbuffer_surfaces,
320 &brw_renderbuffer_read_surfaces,
321 &brw_texture_surfaces,
322 &brw_vs_binding_table,
323 &brw_tcs_binding_table,
324 &brw_tes_binding_table,
325 &brw_gs_binding_table,
326 &brw_wm_binding_table,
327
328 &brw_fs_samplers,
329 &brw_vs_samplers,
330 &brw_tcs_samplers,
331 &brw_tes_samplers,
332 &brw_gs_samplers,
333 &gen8_multisample_state,
334
335 &gen8_disable_stages,
336 &gen8_vs_state,
337 &gen8_hs_state,
338 &gen7_te_state,
339 &gen8_ds_state,
340 &gen8_gs_state,
341 &gen7_sol_state,
342 &gen6_clip_state,
343 &gen8_raster_state,
344 &gen8_sbe_state,
345 &gen8_sf_state,
346 &gen8_ps_blend,
347 &gen8_ps_extra,
348 &gen8_ps_state,
349 &gen8_wm_depth_stencil,
350 &gen8_wm_state,
351
352 &gen6_scissor_state,
353
354 &gen7_depthbuffer,
355
356 &brw_polygon_stipple,
357 &brw_polygon_stipple_offset,
358
359 &brw_line_stipple,
360
361 &brw_drawing_rect,
362
363 &gen8_vf_topology,
364
365 &brw_indices,
366 &gen8_index_buffer,
367 &gen8_vertices,
368
369 &haswell_cut_index,
370 &gen8_pma_fix,
371 };
372
373 static const struct brw_tracked_state *gen8_compute_atoms[] =
374 {
375 &gen7_l3_state,
376 &brw_cs_image_surfaces,
377 &gen7_cs_push_constants,
378 &brw_cs_pull_constants,
379 &brw_cs_ubo_surfaces,
380 &brw_cs_abo_surfaces,
381 &brw_cs_texture_surfaces,
382 &brw_cs_work_groups_surface,
383 &brw_cs_samplers,
384 &brw_cs_state,
385 };
386
387 static void
388 brw_upload_initial_gpu_state(struct brw_context *brw)
389 {
390 /* On platforms with hardware contexts, we can set our initial GPU state
391 * right away rather than doing it via state atoms. This saves a small
392 * amount of overhead on every draw call.
393 */
394 if (!brw->hw_ctx)
395 return;
396
397 if (brw->gen == 6)
398 brw_emit_post_sync_nonzero_flush(brw);
399
400 brw_upload_invariant_state(brw);
401
402 /* Recommended optimization for Victim Cache eviction in pixel backend. */
403 if (brw->gen >= 9) {
404 BEGIN_BATCH(3);
405 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
406 OUT_BATCH(GEN7_CACHE_MODE_1);
407 OUT_BATCH(REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
408 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
409 ADVANCE_BATCH();
410 }
411
412 if (brw->gen >= 8) {
413 gen8_emit_3dstate_sample_pattern(brw);
414 }
415 }
416
417 static inline const struct brw_tracked_state *
418 brw_get_pipeline_atoms(struct brw_context *brw,
419 enum brw_pipeline pipeline)
420 {
421 switch (pipeline) {
422 case BRW_RENDER_PIPELINE:
423 return brw->render_atoms;
424 case BRW_COMPUTE_PIPELINE:
425 return brw->compute_atoms;
426 default:
427 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
428 unreachable("Unsupported pipeline");
429 return NULL;
430 }
431 }
432
433 static void
434 brw_copy_pipeline_atoms(struct brw_context *brw,
435 enum brw_pipeline pipeline,
436 const struct brw_tracked_state **atoms,
437 int num_atoms)
438 {
439 /* This is to work around brw_context::atoms being declared const. We want
440 * it to be const, but it needs to be initialized somehow!
441 */
442 struct brw_tracked_state *context_atoms =
443 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
444
445 for (int i = 0; i < num_atoms; i++) {
446 context_atoms[i] = *atoms[i];
447 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
448 assert(context_atoms[i].emit);
449 }
450
451 brw->num_atoms[pipeline] = num_atoms;
452 }
453
454 void brw_init_state( struct brw_context *brw )
455 {
456 struct gl_context *ctx = &brw->ctx;
457
458 /* Force the first brw_select_pipeline to emit pipeline select */
459 brw->last_pipeline = BRW_NUM_PIPELINES;
460
461 STATIC_ASSERT(ARRAY_SIZE(gen4_atoms) <= ARRAY_SIZE(brw->render_atoms));
462 STATIC_ASSERT(ARRAY_SIZE(gen6_atoms) <= ARRAY_SIZE(brw->render_atoms));
463 STATIC_ASSERT(ARRAY_SIZE(gen7_render_atoms) <=
464 ARRAY_SIZE(brw->render_atoms));
465 STATIC_ASSERT(ARRAY_SIZE(gen8_render_atoms) <=
466 ARRAY_SIZE(brw->render_atoms));
467 STATIC_ASSERT(ARRAY_SIZE(gen7_compute_atoms) <=
468 ARRAY_SIZE(brw->compute_atoms));
469 STATIC_ASSERT(ARRAY_SIZE(gen8_compute_atoms) <=
470 ARRAY_SIZE(brw->compute_atoms));
471
472 brw_init_caches(brw);
473
474 if (brw->gen >= 8) {
475 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
476 gen8_render_atoms,
477 ARRAY_SIZE(gen8_render_atoms));
478 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
479 gen8_compute_atoms,
480 ARRAY_SIZE(gen8_compute_atoms));
481 } else if (brw->gen == 7) {
482 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
483 gen7_render_atoms,
484 ARRAY_SIZE(gen7_render_atoms));
485 brw_copy_pipeline_atoms(brw, BRW_COMPUTE_PIPELINE,
486 gen7_compute_atoms,
487 ARRAY_SIZE(gen7_compute_atoms));
488 } else if (brw->gen == 6) {
489 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
490 gen6_atoms, ARRAY_SIZE(gen6_atoms));
491 } else {
492 brw_copy_pipeline_atoms(brw, BRW_RENDER_PIPELINE,
493 gen4_atoms, ARRAY_SIZE(gen4_atoms));
494 }
495
496 brw_upload_initial_gpu_state(brw);
497
498 brw->NewGLState = ~0;
499 brw->ctx.NewDriverState = ~0ull;
500
501 /* ~0 is a nonsensical value which won't match anything we program, so
502 * the programming will take effect on the first time around.
503 */
504 brw->pma_stall_bits = ~0;
505
506 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
507 * dirty flags.
508 */
509 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
510
511 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
512 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
513 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
514 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
515 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
516 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
517 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
518 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
519 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
520 }
521
522
523 void brw_destroy_state( struct brw_context *brw )
524 {
525 brw_destroy_caches(brw);
526 }
527
528 /***********************************************************************
529 */
530
531 static bool
532 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
533 {
534 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
535 }
536
537 static void accumulate_state( struct brw_state_flags *a,
538 const struct brw_state_flags *b )
539 {
540 a->mesa |= b->mesa;
541 a->brw |= b->brw;
542 }
543
544
545 static void xor_states( struct brw_state_flags *result,
546 const struct brw_state_flags *a,
547 const struct brw_state_flags *b )
548 {
549 result->mesa = a->mesa ^ b->mesa;
550 result->brw = a->brw ^ b->brw;
551 }
552
553 struct dirty_bit_map {
554 uint64_t bit;
555 char *name;
556 uint32_t count;
557 };
558
559 #define DEFINE_BIT(name) {name, #name, 0}
560
561 static struct dirty_bit_map mesa_bits[] = {
562 DEFINE_BIT(_NEW_MODELVIEW),
563 DEFINE_BIT(_NEW_PROJECTION),
564 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
565 DEFINE_BIT(_NEW_COLOR),
566 DEFINE_BIT(_NEW_DEPTH),
567 DEFINE_BIT(_NEW_EVAL),
568 DEFINE_BIT(_NEW_FOG),
569 DEFINE_BIT(_NEW_HINT),
570 DEFINE_BIT(_NEW_LIGHT),
571 DEFINE_BIT(_NEW_LINE),
572 DEFINE_BIT(_NEW_PIXEL),
573 DEFINE_BIT(_NEW_POINT),
574 DEFINE_BIT(_NEW_POLYGON),
575 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
576 DEFINE_BIT(_NEW_SCISSOR),
577 DEFINE_BIT(_NEW_STENCIL),
578 DEFINE_BIT(_NEW_TEXTURE),
579 DEFINE_BIT(_NEW_TRANSFORM),
580 DEFINE_BIT(_NEW_VIEWPORT),
581 DEFINE_BIT(_NEW_ARRAY),
582 DEFINE_BIT(_NEW_RENDERMODE),
583 DEFINE_BIT(_NEW_BUFFERS),
584 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
585 DEFINE_BIT(_NEW_MULTISAMPLE),
586 DEFINE_BIT(_NEW_TRACK_MATRIX),
587 DEFINE_BIT(_NEW_PROGRAM),
588 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
589 DEFINE_BIT(_NEW_BUFFER_OBJECT),
590 DEFINE_BIT(_NEW_FRAG_CLAMP),
591 /* Avoid sign extension problems. */
592 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
593 {0, 0, 0}
594 };
595
596 static struct dirty_bit_map brw_bits[] = {
597 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
598 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
599 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
600 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
601 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
602 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
603 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
604 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
605 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
606 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
607 DEFINE_BIT(BRW_NEW_URB_FENCE),
608 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
609 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
610 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
611 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
612 DEFINE_BIT(BRW_NEW_CURBE_OFFSETS),
613 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
614 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
615 DEFINE_BIT(BRW_NEW_PRIMITIVE),
616 DEFINE_BIT(BRW_NEW_CONTEXT),
617 DEFINE_BIT(BRW_NEW_PSP),
618 DEFINE_BIT(BRW_NEW_SURFACES),
619 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
620 DEFINE_BIT(BRW_NEW_INDICES),
621 DEFINE_BIT(BRW_NEW_VERTICES),
622 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
623 DEFINE_BIT(BRW_NEW_BATCH),
624 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
625 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
626 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
627 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
628 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
629 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
630 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
631 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
632 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
633 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
634 DEFINE_BIT(BRW_NEW_STATS_WM),
635 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
636 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
637 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
638 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
639 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
640 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
641 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
642 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
643 DEFINE_BIT(BRW_NEW_CC_VP),
644 DEFINE_BIT(BRW_NEW_SF_VP),
645 DEFINE_BIT(BRW_NEW_CLIP_VP),
646 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
647 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
648 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
649 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
650 DEFINE_BIT(BRW_NEW_URB_SIZE),
651 DEFINE_BIT(BRW_NEW_CC_STATE),
652 DEFINE_BIT(BRW_NEW_BLORP),
653 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
654 {0, 0, 0}
655 };
656
657 static void
658 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
659 {
660 for (int i = 0; bit_map[i].bit != 0; i++) {
661 if (bit_map[i].bit & bits)
662 bit_map[i].count++;
663 }
664 }
665
666 static void
667 brw_print_dirty_count(struct dirty_bit_map *bit_map)
668 {
669 for (int i = 0; bit_map[i].bit != 0; i++) {
670 if (bit_map[i].count > 1) {
671 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
672 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
673 }
674 }
675 }
676
677 static inline void
678 brw_upload_tess_programs(struct brw_context *brw)
679 {
680 if (brw->tess_eval_program) {
681 brw_upload_tcs_prog(brw);
682 brw_upload_tes_prog(brw);
683 } else {
684 brw->tcs.base.prog_data = NULL;
685 brw->tes.base.prog_data = NULL;
686 }
687 }
688
689 static inline void
690 brw_upload_programs(struct brw_context *brw,
691 enum brw_pipeline pipeline)
692 {
693 struct gl_context *ctx = &brw->ctx;
694
695 if (pipeline == BRW_RENDER_PIPELINE) {
696 brw_upload_vs_prog(brw);
697 brw_upload_tess_programs(brw);
698
699 if (brw->gen < 6)
700 brw_upload_ff_gs_prog(brw);
701 else
702 brw_upload_gs_prog(brw);
703
704 /* Update the VUE map for data exiting the GS stage of the pipeline.
705 * This comes from the last enabled shader stage.
706 */
707 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
708 bool old_separate = brw->vue_map_geom_out.separate;
709 struct brw_vue_prog_data *vue_prog_data;
710 if (brw->geometry_program)
711 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
712 else if (brw->tess_eval_program)
713 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
714 else
715 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
716
717 brw->vue_map_geom_out = vue_prog_data->vue_map;
718
719 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
720 if (old_slots != brw->vue_map_geom_out.slots_valid ||
721 old_separate != brw->vue_map_geom_out.separate)
722 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
723
724 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
725 VARYING_BIT_VIEWPORT) {
726 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
727 brw->clip.viewport_count =
728 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
729 ctx->Const.MaxViewports : 1;
730 }
731
732 brw_upload_wm_prog(brw);
733
734 if (brw->gen < 6) {
735 brw_upload_clip_prog(brw);
736 brw_upload_sf_prog(brw);
737 }
738 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
739 brw_upload_cs_prog(brw);
740 }
741 }
742
743 static inline void
744 merge_ctx_state(struct brw_context *brw,
745 struct brw_state_flags *state)
746 {
747 state->mesa |= brw->NewGLState;
748 state->brw |= brw->ctx.NewDriverState;
749 }
750
751 static inline void
752 check_and_emit_atom(struct brw_context *brw,
753 struct brw_state_flags *state,
754 const struct brw_tracked_state *atom)
755 {
756 if (check_state(state, &atom->dirty)) {
757 atom->emit(brw);
758 merge_ctx_state(brw, state);
759 }
760 }
761
762 static inline void
763 brw_upload_pipeline_state(struct brw_context *brw,
764 enum brw_pipeline pipeline)
765 {
766 struct gl_context *ctx = &brw->ctx;
767 int i;
768 static int dirty_count = 0;
769 struct brw_state_flags state = brw->state.pipelines[pipeline];
770 unsigned int fb_samples = _mesa_geometric_samples(ctx->DrawBuffer);
771
772 brw_select_pipeline(brw, pipeline);
773
774 if (0) {
775 /* Always re-emit all state. */
776 brw->NewGLState = ~0;
777 ctx->NewDriverState = ~0ull;
778 }
779
780 if (pipeline == BRW_RENDER_PIPELINE) {
781 if (brw->fragment_program != ctx->FragmentProgram._Current) {
782 brw->fragment_program = ctx->FragmentProgram._Current;
783 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
784 }
785
786 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
787 brw->tess_eval_program = ctx->TessEvalProgram._Current;
788 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
789 }
790
791 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
792 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
793 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
794 }
795
796 if (brw->geometry_program != ctx->GeometryProgram._Current) {
797 brw->geometry_program = ctx->GeometryProgram._Current;
798 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
799 }
800
801 if (brw->vertex_program != ctx->VertexProgram._Current) {
802 brw->vertex_program = ctx->VertexProgram._Current;
803 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
804 }
805 }
806
807 if (brw->compute_program != ctx->ComputeProgram._Current) {
808 brw->compute_program = ctx->ComputeProgram._Current;
809 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
810 }
811
812 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
813 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
814 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
815 }
816
817 if (brw->num_samples != fb_samples) {
818 brw->num_samples = fb_samples;
819 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
820 }
821
822 /* Exit early if no state is flagged as dirty */
823 merge_ctx_state(brw, &state);
824 if ((state.mesa | state.brw) == 0)
825 return;
826
827 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
828 if (brw->gen == 6)
829 brw_emit_post_sync_nonzero_flush(brw);
830
831 brw_upload_programs(brw, pipeline);
832 merge_ctx_state(brw, &state);
833
834 brw_upload_state_base_address(brw);
835
836 const struct brw_tracked_state *atoms =
837 brw_get_pipeline_atoms(brw, pipeline);
838 const int num_atoms = brw->num_atoms[pipeline];
839
840 if (unlikely(INTEL_DEBUG)) {
841 /* Debug version which enforces various sanity checks on the
842 * state flags which are generated and checked to help ensure
843 * state atoms are ordered correctly in the list.
844 */
845 struct brw_state_flags examined, prev;
846 memset(&examined, 0, sizeof(examined));
847 prev = state;
848
849 for (i = 0; i < num_atoms; i++) {
850 const struct brw_tracked_state *atom = &atoms[i];
851 struct brw_state_flags generated;
852
853 check_and_emit_atom(brw, &state, atom);
854
855 accumulate_state(&examined, &atom->dirty);
856
857 /* generated = (prev ^ state)
858 * if (examined & generated)
859 * fail;
860 */
861 xor_states(&generated, &prev, &state);
862 assert(!check_state(&examined, &generated));
863 prev = state;
864 }
865 }
866 else {
867 for (i = 0; i < num_atoms; i++) {
868 const struct brw_tracked_state *atom = &atoms[i];
869
870 check_and_emit_atom(brw, &state, atom);
871 }
872 }
873
874 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
875 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
876
877 brw_update_dirty_count(mesa_bits, state.mesa);
878 brw_update_dirty_count(brw_bits, state.brw);
879 if (dirty_count++ % 1000 == 0) {
880 brw_print_dirty_count(mesa_bits);
881 brw_print_dirty_count(brw_bits);
882 fprintf(stderr, "\n");
883 }
884 }
885 }
886
887 /***********************************************************************
888 * Emit all state:
889 */
890 void brw_upload_render_state(struct brw_context *brw)
891 {
892 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
893 }
894
895 static inline void
896 brw_pipeline_state_finished(struct brw_context *brw,
897 enum brw_pipeline pipeline)
898 {
899 /* Save all dirty state into the other pipelines */
900 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
901 if (i != pipeline) {
902 brw->state.pipelines[i].mesa |= brw->NewGLState;
903 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
904 } else {
905 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
906 }
907 }
908
909 brw->NewGLState = 0;
910 brw->ctx.NewDriverState = 0ull;
911 }
912
913 /**
914 * Clear dirty bits to account for the fact that the state emitted by
915 * brw_upload_render_state() has been committed to the hardware. This is a
916 * separate call from brw_upload_render_state() because it's possible that
917 * after the call to brw_upload_render_state(), we will discover that we've
918 * run out of aperture space, and need to rewind the batch buffer to the state
919 * it had before the brw_upload_render_state() call.
920 */
921 void
922 brw_render_state_finished(struct brw_context *brw)
923 {
924 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
925 }
926
927 void
928 brw_upload_compute_state(struct brw_context *brw)
929 {
930 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
931 }
932
933 void
934 brw_compute_state_finished(struct brw_context *brw)
935 {
936 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
937 }