i965: Make a BRW_NEW_FAST_CLEAR_COLOR dirty bit.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "main/framebuffer.h"
47
48 static void
49 brw_upload_initial_gpu_state(struct brw_context *brw)
50 {
51 /* On platforms with hardware contexts, we can set our initial GPU state
52 * right away rather than doing it via state atoms. This saves a small
53 * amount of overhead on every draw call.
54 */
55 if (!brw->hw_ctx)
56 return;
57
58 if (brw->gen == 6)
59 brw_emit_post_sync_nonzero_flush(brw);
60
61 brw_upload_invariant_state(brw);
62
63 if (brw->gen == 9) {
64 /* Recommended optimizations for Victim Cache eviction and floating
65 * point blending.
66 */
67 BEGIN_BATCH(3);
68 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
69 OUT_BATCH(GEN7_CACHE_MODE_1);
70 OUT_BATCH(REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
71 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
72 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
73 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
74 ADVANCE_BATCH();
75
76 if (brw->is_broxton) {
77 BEGIN_BATCH(3);
78 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
79 OUT_BATCH(GEN7_GT_MODE);
80 OUT_BATCH(GEN9_SUBSLICE_HASHING_MASK_BITS |
81 GEN9_SUBSLICE_HASHING_16x16);
82 ADVANCE_BATCH();
83 }
84 }
85
86 if (brw->gen >= 8) {
87 gen8_emit_3dstate_sample_pattern(brw);
88
89 BEGIN_BATCH(5);
90 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
91 OUT_BATCH(0);
92 OUT_BATCH(0);
93 OUT_BATCH(0);
94 OUT_BATCH(0);
95 ADVANCE_BATCH();
96
97 BEGIN_BATCH(2);
98 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
99 OUT_BATCH(0);
100 ADVANCE_BATCH();
101 }
102
103 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
104 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
105 *
106 * On Gen6-7.5, we use an execbuf parameter to do this for us.
107 * However, the kernel ignores that when execlists are in use.
108 * Fortunately, we can just write the registers from userspace
109 * on Gen8+, and they're context saved/restored.
110 */
111 if (brw->gen >= 9) {
112 BEGIN_BATCH(3);
113 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
114 OUT_BATCH(CS_DEBUG_MODE2);
115 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
116 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
117 ADVANCE_BATCH();
118 } else if (brw->gen == 8) {
119 BEGIN_BATCH(3);
120 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
121 OUT_BATCH(INSTPM);
122 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
123 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
124 ADVANCE_BATCH();
125 }
126 }
127
128 static inline const struct brw_tracked_state *
129 brw_get_pipeline_atoms(struct brw_context *brw,
130 enum brw_pipeline pipeline)
131 {
132 switch (pipeline) {
133 case BRW_RENDER_PIPELINE:
134 return brw->render_atoms;
135 case BRW_COMPUTE_PIPELINE:
136 return brw->compute_atoms;
137 default:
138 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
139 unreachable("Unsupported pipeline");
140 return NULL;
141 }
142 }
143
144 void
145 brw_copy_pipeline_atoms(struct brw_context *brw,
146 enum brw_pipeline pipeline,
147 const struct brw_tracked_state **atoms,
148 int num_atoms)
149 {
150 /* This is to work around brw_context::atoms being declared const. We want
151 * it to be const, but it needs to be initialized somehow!
152 */
153 struct brw_tracked_state *context_atoms =
154 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
155
156 for (int i = 0; i < num_atoms; i++) {
157 context_atoms[i] = *atoms[i];
158 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
159 assert(context_atoms[i].emit);
160 }
161
162 brw->num_atoms[pipeline] = num_atoms;
163 }
164
165 void brw_init_state( struct brw_context *brw )
166 {
167 struct gl_context *ctx = &brw->ctx;
168
169 /* Force the first brw_select_pipeline to emit pipeline select */
170 brw->last_pipeline = BRW_NUM_PIPELINES;
171
172 brw_init_caches(brw);
173
174 if (brw->gen >= 10)
175 gen10_init_atoms(brw);
176 else if (brw->gen >= 9)
177 gen9_init_atoms(brw);
178 else if (brw->gen >= 8)
179 gen8_init_atoms(brw);
180 else if (brw->is_haswell)
181 gen75_init_atoms(brw);
182 else if (brw->gen >= 7)
183 gen7_init_atoms(brw);
184 else if (brw->gen >= 6)
185 gen6_init_atoms(brw);
186 else if (brw->gen >= 5)
187 gen5_init_atoms(brw);
188 else if (brw->is_g4x)
189 gen45_init_atoms(brw);
190 else
191 gen4_init_atoms(brw);
192
193 brw_upload_initial_gpu_state(brw);
194
195 brw->NewGLState = ~0;
196 brw->ctx.NewDriverState = ~0ull;
197
198 /* ~0 is a nonsensical value which won't match anything we program, so
199 * the programming will take effect on the first time around.
200 */
201 brw->pma_stall_bits = ~0;
202
203 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
204 * dirty flags.
205 */
206 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
207
208 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
209 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
210 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
211 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
212 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
213 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
214 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
215 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
216 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
217 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
218 }
219
220
221 void brw_destroy_state( struct brw_context *brw )
222 {
223 brw_destroy_caches(brw);
224 }
225
226 /***********************************************************************
227 */
228
229 static bool
230 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
231 {
232 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
233 }
234
235 static void accumulate_state( struct brw_state_flags *a,
236 const struct brw_state_flags *b )
237 {
238 a->mesa |= b->mesa;
239 a->brw |= b->brw;
240 }
241
242
243 static void xor_states( struct brw_state_flags *result,
244 const struct brw_state_flags *a,
245 const struct brw_state_flags *b )
246 {
247 result->mesa = a->mesa ^ b->mesa;
248 result->brw = a->brw ^ b->brw;
249 }
250
251 struct dirty_bit_map {
252 uint64_t bit;
253 char *name;
254 uint32_t count;
255 };
256
257 #define DEFINE_BIT(name) {name, #name, 0}
258
259 static struct dirty_bit_map mesa_bits[] = {
260 DEFINE_BIT(_NEW_MODELVIEW),
261 DEFINE_BIT(_NEW_PROJECTION),
262 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
263 DEFINE_BIT(_NEW_COLOR),
264 DEFINE_BIT(_NEW_DEPTH),
265 DEFINE_BIT(_NEW_EVAL),
266 DEFINE_BIT(_NEW_FOG),
267 DEFINE_BIT(_NEW_HINT),
268 DEFINE_BIT(_NEW_LIGHT),
269 DEFINE_BIT(_NEW_LINE),
270 DEFINE_BIT(_NEW_PIXEL),
271 DEFINE_BIT(_NEW_POINT),
272 DEFINE_BIT(_NEW_POLYGON),
273 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
274 DEFINE_BIT(_NEW_SCISSOR),
275 DEFINE_BIT(_NEW_STENCIL),
276 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
277 DEFINE_BIT(_NEW_TRANSFORM),
278 DEFINE_BIT(_NEW_VIEWPORT),
279 DEFINE_BIT(_NEW_TEXTURE_STATE),
280 DEFINE_BIT(_NEW_ARRAY),
281 DEFINE_BIT(_NEW_RENDERMODE),
282 DEFINE_BIT(_NEW_BUFFERS),
283 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
284 DEFINE_BIT(_NEW_MULTISAMPLE),
285 DEFINE_BIT(_NEW_TRACK_MATRIX),
286 DEFINE_BIT(_NEW_PROGRAM),
287 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
288 DEFINE_BIT(_NEW_FRAG_CLAMP),
289 /* Avoid sign extension problems. */
290 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
291 {0, 0, 0}
292 };
293
294 static struct dirty_bit_map brw_bits[] = {
295 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
296 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
297 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
298 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
299 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
300 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
301 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
302 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
303 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
304 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
305 DEFINE_BIT(BRW_NEW_URB_FENCE),
306 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
307 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
308 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
309 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
310 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
311 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
312 DEFINE_BIT(BRW_NEW_PRIMITIVE),
313 DEFINE_BIT(BRW_NEW_CONTEXT),
314 DEFINE_BIT(BRW_NEW_PSP),
315 DEFINE_BIT(BRW_NEW_SURFACES),
316 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
317 DEFINE_BIT(BRW_NEW_INDICES),
318 DEFINE_BIT(BRW_NEW_VERTICES),
319 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
320 DEFINE_BIT(BRW_NEW_BATCH),
321 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
322 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
323 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
324 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
325 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
326 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
327 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
328 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
329 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
330 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
331 DEFINE_BIT(BRW_NEW_STATS_WM),
332 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
333 DEFINE_BIT(BRW_NEW_ATOMIC_BUFFER),
334 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
335 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
336 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
337 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
338 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
339 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
340 DEFINE_BIT(BRW_NEW_CC_VP),
341 DEFINE_BIT(BRW_NEW_SF_VP),
342 DEFINE_BIT(BRW_NEW_CLIP_VP),
343 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
344 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
345 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
346 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
347 DEFINE_BIT(BRW_NEW_URB_SIZE),
348 DEFINE_BIT(BRW_NEW_CC_STATE),
349 DEFINE_BIT(BRW_NEW_BLORP),
350 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
351 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
352 DEFINE_BIT(BRW_NEW_DRAW_CALL),
353 DEFINE_BIT(BRW_NEW_FAST_CLEAR_COLOR),
354 {0, 0, 0}
355 };
356
357 static void
358 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
359 {
360 for (int i = 0; bit_map[i].bit != 0; i++) {
361 if (bit_map[i].bit & bits)
362 bit_map[i].count++;
363 }
364 }
365
366 static void
367 brw_print_dirty_count(struct dirty_bit_map *bit_map)
368 {
369 for (int i = 0; bit_map[i].bit != 0; i++) {
370 if (bit_map[i].count > 1) {
371 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
372 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
373 }
374 }
375 }
376
377 static inline void
378 brw_upload_tess_programs(struct brw_context *brw)
379 {
380 if (brw->tess_eval_program) {
381 brw_upload_tcs_prog(brw);
382 brw_upload_tes_prog(brw);
383 } else {
384 brw->tcs.base.prog_data = NULL;
385 brw->tes.base.prog_data = NULL;
386 }
387 }
388
389 static inline void
390 brw_upload_programs(struct brw_context *brw,
391 enum brw_pipeline pipeline)
392 {
393 struct gl_context *ctx = &brw->ctx;
394
395 if (pipeline == BRW_RENDER_PIPELINE) {
396 brw_upload_vs_prog(brw);
397 brw_upload_tess_programs(brw);
398
399 if (brw->gen < 6)
400 brw_upload_ff_gs_prog(brw);
401 else
402 brw_upload_gs_prog(brw);
403
404 /* Update the VUE map for data exiting the GS stage of the pipeline.
405 * This comes from the last enabled shader stage.
406 */
407 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
408 bool old_separate = brw->vue_map_geom_out.separate;
409 struct brw_vue_prog_data *vue_prog_data;
410 if (brw->geometry_program)
411 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
412 else if (brw->tess_eval_program)
413 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
414 else
415 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
416
417 brw->vue_map_geom_out = vue_prog_data->vue_map;
418
419 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
420 if (old_slots != brw->vue_map_geom_out.slots_valid ||
421 old_separate != brw->vue_map_geom_out.separate)
422 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
423
424 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
425 VARYING_BIT_VIEWPORT) {
426 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
427 brw->clip.viewport_count =
428 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
429 ctx->Const.MaxViewports : 1;
430 }
431
432 brw_upload_wm_prog(brw);
433
434 if (brw->gen < 6) {
435 brw_upload_clip_prog(brw);
436 brw_upload_sf_prog(brw);
437 }
438 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
439 brw_upload_cs_prog(brw);
440 }
441 }
442
443 static inline void
444 merge_ctx_state(struct brw_context *brw,
445 struct brw_state_flags *state)
446 {
447 state->mesa |= brw->NewGLState;
448 state->brw |= brw->ctx.NewDriverState;
449 }
450
451 static ALWAYS_INLINE void
452 check_and_emit_atom(struct brw_context *brw,
453 struct brw_state_flags *state,
454 const struct brw_tracked_state *atom)
455 {
456 if (check_state(state, &atom->dirty)) {
457 atom->emit(brw);
458 merge_ctx_state(brw, state);
459 }
460 }
461
462 static inline void
463 brw_upload_pipeline_state(struct brw_context *brw,
464 enum brw_pipeline pipeline)
465 {
466 struct gl_context *ctx = &brw->ctx;
467 int i;
468 static int dirty_count = 0;
469 struct brw_state_flags state = brw->state.pipelines[pipeline];
470 const unsigned fb_samples =
471 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
472
473 brw_select_pipeline(brw, pipeline);
474
475 if (0) {
476 /* Always re-emit all state. */
477 brw->NewGLState = ~0;
478 ctx->NewDriverState = ~0ull;
479 }
480
481 if (pipeline == BRW_RENDER_PIPELINE) {
482 if (brw->fragment_program != ctx->FragmentProgram._Current) {
483 brw->fragment_program = ctx->FragmentProgram._Current;
484 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
485 }
486
487 if (brw->tess_eval_program != ctx->TessEvalProgram._Current) {
488 brw->tess_eval_program = ctx->TessEvalProgram._Current;
489 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
490 }
491
492 if (brw->tess_ctrl_program != ctx->TessCtrlProgram._Current) {
493 brw->tess_ctrl_program = ctx->TessCtrlProgram._Current;
494 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
495 }
496
497 if (brw->geometry_program != ctx->GeometryProgram._Current) {
498 brw->geometry_program = ctx->GeometryProgram._Current;
499 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
500 }
501
502 if (brw->vertex_program != ctx->VertexProgram._Current) {
503 brw->vertex_program = ctx->VertexProgram._Current;
504 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
505 }
506 }
507
508 if (brw->compute_program != ctx->ComputeProgram._Current) {
509 brw->compute_program = ctx->ComputeProgram._Current;
510 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
511 }
512
513 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
514 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
515 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
516 }
517
518 if (brw->num_samples != fb_samples) {
519 brw->num_samples = fb_samples;
520 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
521 }
522
523 /* Exit early if no state is flagged as dirty */
524 merge_ctx_state(brw, &state);
525 if ((state.mesa | state.brw) == 0)
526 return;
527
528 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
529 if (brw->gen == 6)
530 brw_emit_post_sync_nonzero_flush(brw);
531
532 brw_upload_programs(brw, pipeline);
533 merge_ctx_state(brw, &state);
534
535 brw_upload_state_base_address(brw);
536
537 const struct brw_tracked_state *atoms =
538 brw_get_pipeline_atoms(brw, pipeline);
539 const int num_atoms = brw->num_atoms[pipeline];
540
541 if (unlikely(INTEL_DEBUG)) {
542 /* Debug version which enforces various sanity checks on the
543 * state flags which are generated and checked to help ensure
544 * state atoms are ordered correctly in the list.
545 */
546 struct brw_state_flags examined, prev;
547 memset(&examined, 0, sizeof(examined));
548 prev = state;
549
550 for (i = 0; i < num_atoms; i++) {
551 const struct brw_tracked_state *atom = &atoms[i];
552 struct brw_state_flags generated;
553
554 check_and_emit_atom(brw, &state, atom);
555
556 accumulate_state(&examined, &atom->dirty);
557
558 /* generated = (prev ^ state)
559 * if (examined & generated)
560 * fail;
561 */
562 xor_states(&generated, &prev, &state);
563 assert(!check_state(&examined, &generated));
564 prev = state;
565 }
566 }
567 else {
568 for (i = 0; i < num_atoms; i++) {
569 const struct brw_tracked_state *atom = &atoms[i];
570
571 check_and_emit_atom(brw, &state, atom);
572 }
573 }
574
575 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
576 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
577
578 brw_update_dirty_count(mesa_bits, state.mesa);
579 brw_update_dirty_count(brw_bits, state.brw);
580 if (dirty_count++ % 1000 == 0) {
581 brw_print_dirty_count(mesa_bits);
582 brw_print_dirty_count(brw_bits);
583 fprintf(stderr, "\n");
584 }
585 }
586 }
587
588 /***********************************************************************
589 * Emit all state:
590 */
591 void brw_upload_render_state(struct brw_context *brw)
592 {
593 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
594 }
595
596 static inline void
597 brw_pipeline_state_finished(struct brw_context *brw,
598 enum brw_pipeline pipeline)
599 {
600 /* Save all dirty state into the other pipelines */
601 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
602 if (i != pipeline) {
603 brw->state.pipelines[i].mesa |= brw->NewGLState;
604 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
605 } else {
606 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
607 }
608 }
609
610 brw->NewGLState = 0;
611 brw->ctx.NewDriverState = 0ull;
612 }
613
614 /**
615 * Clear dirty bits to account for the fact that the state emitted by
616 * brw_upload_render_state() has been committed to the hardware. This is a
617 * separate call from brw_upload_render_state() because it's possible that
618 * after the call to brw_upload_render_state(), we will discover that we've
619 * run out of aperture space, and need to rewind the batch buffer to the state
620 * it had before the brw_upload_render_state() call.
621 */
622 void
623 brw_render_state_finished(struct brw_context *brw)
624 {
625 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
626 }
627
628 void
629 brw_upload_compute_state(struct brw_context *brw)
630 {
631 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
632 }
633
634 void
635 brw_compute_state_finished(struct brw_context *brw)
636 {
637 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
638 }