Revert "i965/icl: Add WA_2204188704 to disable pixel shader panic dispatch"
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "main/framebuffer.h"
47
48 void
49 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
50 {
51 const struct gen_device_info *devinfo = &brw->screen->devinfo;
52 assert(devinfo->gen >= 9);
53
54 if (enable == brw->object_preemption)
55 return;
56
57 /* A fixed function pipe flush is required before modifying this field */
58 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
59
60 bool replay_mode = enable ?
61 GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
62
63 /* enable object level preemption */
64 brw_load_register_imm32(brw, CS_CHICKEN1,
65 replay_mode | GEN9_REPLAY_MODE_MASK);
66
67 brw->object_preemption = enable;
68 }
69
70 static void
71 brw_upload_initial_gpu_state(struct brw_context *brw)
72 {
73 const struct gen_device_info *devinfo = &brw->screen->devinfo;
74 const struct brw_compiler *compiler = brw->screen->compiler;
75
76 /* On platforms with hardware contexts, we can set our initial GPU state
77 * right away rather than doing it via state atoms. This saves a small
78 * amount of overhead on every draw call.
79 */
80 if (!brw->hw_ctx)
81 return;
82
83 if (devinfo->gen == 6)
84 brw_emit_post_sync_nonzero_flush(brw);
85
86 brw_upload_invariant_state(brw);
87
88 if (devinfo->gen == 11) {
89 /* The default behavior of bit 5 "Headerless Message for Pre-emptable
90 * Contexts" in SAMPLER MODE register is set to 0, which means
91 * headerless sampler messages are not allowed for pre-emptable
92 * contexts. Set the bit 5 to 1 to allow them.
93 */
94 brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
95 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
96 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
97
98 /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
99 * HALF_SLICE_CHICKEN7 register.
100 */
101 brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
102 TEXEL_OFFSET_FIX_MASK |
103 TEXEL_OFFSET_FIX_ENABLE);
104
105 /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
106 * in L3CNTLREG register. The default setting of the bit is not the
107 * desirable behavior.
108 */
109 brw_load_register_imm32(brw, GEN8_L3CNTLREG,
110 GEN8_L3CNTLREG_EDBC_NO_HANG);
111
112 /* WaEnableStateCacheRedirectToCS:icl */
113 brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
114 GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
115 REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
116 }
117
118 if (devinfo->gen == 10 || devinfo->gen == 11) {
119 /* From gen10 workaround table in h/w specs:
120 *
121 * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
122 * a value of 0xFFFF"
123 *
124 * This means that we end up setting the entire 3D_MODE state. Bits
125 * in this register control things such as slice hashing and we want
126 * the default values of zero at the moment.
127 */
128 BEGIN_BATCH(2);
129 OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
130 OUT_BATCH(0xFFFF << 16);
131 ADVANCE_BATCH();
132 }
133
134 if (devinfo->gen == 9) {
135 /* Recommended optimizations for Victim Cache eviction and floating
136 * point blending.
137 */
138 brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
139 REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
140 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
141 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
142 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
143
144 if (gen_device_info_is_9lp(devinfo)) {
145 brw_load_register_imm32(brw, GEN7_GT_MODE,
146 GEN9_SUBSLICE_HASHING_MASK_BITS |
147 GEN9_SUBSLICE_HASHING_16x16);
148 }
149 }
150
151 if (devinfo->gen >= 8) {
152 gen8_emit_3dstate_sample_pattern(brw);
153
154 BEGIN_BATCH(5);
155 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
156 OUT_BATCH(0);
157 OUT_BATCH(0);
158 OUT_BATCH(0);
159 OUT_BATCH(0);
160 ADVANCE_BATCH();
161
162 BEGIN_BATCH(2);
163 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
164 OUT_BATCH(0);
165 ADVANCE_BATCH();
166 }
167
168 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
169 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
170 *
171 * This is only safe on kernels with context isolation support.
172 */
173 if (!compiler->constant_buffer_0_is_relative) {
174 if (devinfo->gen >= 9) {
175 BEGIN_BATCH(3);
176 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
177 OUT_BATCH(CS_DEBUG_MODE2);
178 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
179 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
180 ADVANCE_BATCH();
181 } else if (devinfo->gen == 8) {
182 BEGIN_BATCH(3);
183 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
184 OUT_BATCH(INSTPM);
185 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
186 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
187 ADVANCE_BATCH();
188 }
189 }
190
191 brw->object_preemption = false;
192
193 if (devinfo->gen >= 10)
194 brw_enable_obj_preemption(brw, true);
195 }
196
197 static inline const struct brw_tracked_state *
198 brw_get_pipeline_atoms(struct brw_context *brw,
199 enum brw_pipeline pipeline)
200 {
201 switch (pipeline) {
202 case BRW_RENDER_PIPELINE:
203 return brw->render_atoms;
204 case BRW_COMPUTE_PIPELINE:
205 return brw->compute_atoms;
206 default:
207 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
208 unreachable("Unsupported pipeline");
209 return NULL;
210 }
211 }
212
213 void
214 brw_copy_pipeline_atoms(struct brw_context *brw,
215 enum brw_pipeline pipeline,
216 const struct brw_tracked_state **atoms,
217 int num_atoms)
218 {
219 /* This is to work around brw_context::atoms being declared const. We want
220 * it to be const, but it needs to be initialized somehow!
221 */
222 struct brw_tracked_state *context_atoms =
223 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
224
225 for (int i = 0; i < num_atoms; i++) {
226 context_atoms[i] = *atoms[i];
227 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
228 assert(context_atoms[i].emit);
229 }
230
231 brw->num_atoms[pipeline] = num_atoms;
232 }
233
234 void brw_init_state( struct brw_context *brw )
235 {
236 struct gl_context *ctx = &brw->ctx;
237 const struct gen_device_info *devinfo = &brw->screen->devinfo;
238
239 /* Force the first brw_select_pipeline to emit pipeline select */
240 brw->last_pipeline = BRW_NUM_PIPELINES;
241
242 brw_init_caches(brw);
243
244 if (devinfo->gen >= 11)
245 gen11_init_atoms(brw);
246 else if (devinfo->gen >= 10)
247 gen10_init_atoms(brw);
248 else if (devinfo->gen >= 9)
249 gen9_init_atoms(brw);
250 else if (devinfo->gen >= 8)
251 gen8_init_atoms(brw);
252 else if (devinfo->is_haswell)
253 gen75_init_atoms(brw);
254 else if (devinfo->gen >= 7)
255 gen7_init_atoms(brw);
256 else if (devinfo->gen >= 6)
257 gen6_init_atoms(brw);
258 else if (devinfo->gen >= 5)
259 gen5_init_atoms(brw);
260 else if (devinfo->is_g4x)
261 gen45_init_atoms(brw);
262 else
263 gen4_init_atoms(brw);
264
265 brw_upload_initial_gpu_state(brw);
266
267 brw->NewGLState = ~0;
268 brw->ctx.NewDriverState = ~0ull;
269
270 /* ~0 is a nonsensical value which won't match anything we program, so
271 * the programming will take effect on the first time around.
272 */
273 brw->pma_stall_bits = ~0;
274
275 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
276 * dirty flags.
277 */
278 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
279
280 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
281 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
282 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
283 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
284 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
285 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
286 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
287 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
288 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
289 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
290 }
291
292
293 void brw_destroy_state( struct brw_context *brw )
294 {
295 brw_destroy_caches(brw);
296 }
297
298 /***********************************************************************
299 */
300
301 static bool
302 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
303 {
304 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
305 }
306
307 static void accumulate_state( struct brw_state_flags *a,
308 const struct brw_state_flags *b )
309 {
310 a->mesa |= b->mesa;
311 a->brw |= b->brw;
312 }
313
314
315 static void xor_states( struct brw_state_flags *result,
316 const struct brw_state_flags *a,
317 const struct brw_state_flags *b )
318 {
319 result->mesa = a->mesa ^ b->mesa;
320 result->brw = a->brw ^ b->brw;
321 }
322
323 struct dirty_bit_map {
324 uint64_t bit;
325 char *name;
326 uint32_t count;
327 };
328
329 #define DEFINE_BIT(name) {name, #name, 0}
330
331 static struct dirty_bit_map mesa_bits[] = {
332 DEFINE_BIT(_NEW_MODELVIEW),
333 DEFINE_BIT(_NEW_PROJECTION),
334 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
335 DEFINE_BIT(_NEW_COLOR),
336 DEFINE_BIT(_NEW_DEPTH),
337 DEFINE_BIT(_NEW_EVAL),
338 DEFINE_BIT(_NEW_FOG),
339 DEFINE_BIT(_NEW_HINT),
340 DEFINE_BIT(_NEW_LIGHT),
341 DEFINE_BIT(_NEW_LINE),
342 DEFINE_BIT(_NEW_PIXEL),
343 DEFINE_BIT(_NEW_POINT),
344 DEFINE_BIT(_NEW_POLYGON),
345 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
346 DEFINE_BIT(_NEW_SCISSOR),
347 DEFINE_BIT(_NEW_STENCIL),
348 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
349 DEFINE_BIT(_NEW_TRANSFORM),
350 DEFINE_BIT(_NEW_VIEWPORT),
351 DEFINE_BIT(_NEW_TEXTURE_STATE),
352 DEFINE_BIT(_NEW_RENDERMODE),
353 DEFINE_BIT(_NEW_BUFFERS),
354 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
355 DEFINE_BIT(_NEW_MULTISAMPLE),
356 DEFINE_BIT(_NEW_TRACK_MATRIX),
357 DEFINE_BIT(_NEW_PROGRAM),
358 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
359 DEFINE_BIT(_NEW_FRAG_CLAMP),
360 /* Avoid sign extension problems. */
361 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
362 {0, 0, 0}
363 };
364
365 static struct dirty_bit_map brw_bits[] = {
366 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
367 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
368 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
369 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
370 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
371 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
372 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
373 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
374 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
375 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
376 DEFINE_BIT(BRW_NEW_URB_FENCE),
377 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
378 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
379 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
380 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
381 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
382 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
383 DEFINE_BIT(BRW_NEW_PRIMITIVE),
384 DEFINE_BIT(BRW_NEW_CONTEXT),
385 DEFINE_BIT(BRW_NEW_PSP),
386 DEFINE_BIT(BRW_NEW_SURFACES),
387 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
388 DEFINE_BIT(BRW_NEW_INDICES),
389 DEFINE_BIT(BRW_NEW_VERTICES),
390 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
391 DEFINE_BIT(BRW_NEW_BATCH),
392 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
393 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
394 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
395 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
396 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
397 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
398 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
399 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
400 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
401 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
402 DEFINE_BIT(BRW_NEW_STATS_WM),
403 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
404 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
405 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
406 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
407 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
408 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
409 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
410 DEFINE_BIT(BRW_NEW_CC_VP),
411 DEFINE_BIT(BRW_NEW_SF_VP),
412 DEFINE_BIT(BRW_NEW_CLIP_VP),
413 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
414 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
415 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
416 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
417 DEFINE_BIT(BRW_NEW_URB_SIZE),
418 DEFINE_BIT(BRW_NEW_CC_STATE),
419 DEFINE_BIT(BRW_NEW_BLORP),
420 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
421 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
422 DEFINE_BIT(BRW_NEW_DRAW_CALL),
423 DEFINE_BIT(BRW_NEW_AUX_STATE),
424 {0, 0, 0}
425 };
426
427 static void
428 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
429 {
430 for (int i = 0; bit_map[i].bit != 0; i++) {
431 if (bit_map[i].bit & bits)
432 bit_map[i].count++;
433 }
434 }
435
436 static void
437 brw_print_dirty_count(struct dirty_bit_map *bit_map)
438 {
439 for (int i = 0; bit_map[i].bit != 0; i++) {
440 if (bit_map[i].count > 1) {
441 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
442 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
443 }
444 }
445 }
446
447 static inline void
448 brw_upload_tess_programs(struct brw_context *brw)
449 {
450 if (brw->programs[MESA_SHADER_TESS_EVAL]) {
451 brw_upload_tcs_prog(brw);
452 brw_upload_tes_prog(brw);
453 } else {
454 brw->tcs.base.prog_data = NULL;
455 brw->tes.base.prog_data = NULL;
456 }
457 }
458
459 static inline void
460 brw_upload_programs(struct brw_context *brw,
461 enum brw_pipeline pipeline)
462 {
463 struct gl_context *ctx = &brw->ctx;
464 const struct gen_device_info *devinfo = &brw->screen->devinfo;
465
466 if (pipeline == BRW_RENDER_PIPELINE) {
467 brw_upload_vs_prog(brw);
468 brw_upload_tess_programs(brw);
469
470 if (brw->programs[MESA_SHADER_GEOMETRY]) {
471 brw_upload_gs_prog(brw);
472 } else {
473 brw->gs.base.prog_data = NULL;
474 if (devinfo->gen < 7)
475 brw_upload_ff_gs_prog(brw);
476 }
477
478 /* Update the VUE map for data exiting the GS stage of the pipeline.
479 * This comes from the last enabled shader stage.
480 */
481 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
482 bool old_separate = brw->vue_map_geom_out.separate;
483 struct brw_vue_prog_data *vue_prog_data;
484 if (brw->programs[MESA_SHADER_GEOMETRY])
485 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
486 else if (brw->programs[MESA_SHADER_TESS_EVAL])
487 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
488 else
489 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
490
491 brw->vue_map_geom_out = vue_prog_data->vue_map;
492
493 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
494 if (old_slots != brw->vue_map_geom_out.slots_valid ||
495 old_separate != brw->vue_map_geom_out.separate)
496 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
497
498 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
499 VARYING_BIT_VIEWPORT) {
500 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
501 brw->clip.viewport_count =
502 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
503 ctx->Const.MaxViewports : 1;
504 }
505
506 brw_upload_wm_prog(brw);
507
508 if (devinfo->gen < 6) {
509 brw_upload_clip_prog(brw);
510 brw_upload_sf_prog(brw);
511 }
512
513 brw_disk_cache_write_render_programs(brw);
514 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
515 brw_upload_cs_prog(brw);
516 brw_disk_cache_write_compute_program(brw);
517 }
518 }
519
520 static inline void
521 merge_ctx_state(struct brw_context *brw,
522 struct brw_state_flags *state)
523 {
524 state->mesa |= brw->NewGLState;
525 state->brw |= brw->ctx.NewDriverState;
526 }
527
528 static ALWAYS_INLINE void
529 check_and_emit_atom(struct brw_context *brw,
530 struct brw_state_flags *state,
531 const struct brw_tracked_state *atom)
532 {
533 if (check_state(state, &atom->dirty)) {
534 atom->emit(brw);
535 merge_ctx_state(brw, state);
536 }
537 }
538
539 static inline void
540 brw_upload_pipeline_state(struct brw_context *brw,
541 enum brw_pipeline pipeline)
542 {
543 const struct gen_device_info *devinfo = &brw->screen->devinfo;
544 struct gl_context *ctx = &brw->ctx;
545 int i;
546 static int dirty_count = 0;
547 struct brw_state_flags state = brw->state.pipelines[pipeline];
548 const unsigned fb_samples =
549 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
550
551 brw_select_pipeline(brw, pipeline);
552
553 if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
554 /* Always re-emit all state. */
555 brw->NewGLState = ~0;
556 ctx->NewDriverState = ~0ull;
557 }
558
559 if (pipeline == BRW_RENDER_PIPELINE) {
560 if (brw->programs[MESA_SHADER_FRAGMENT] !=
561 ctx->FragmentProgram._Current) {
562 brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
563 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
564 }
565
566 if (brw->programs[MESA_SHADER_TESS_EVAL] !=
567 ctx->TessEvalProgram._Current) {
568 brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
569 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
570 }
571
572 if (brw->programs[MESA_SHADER_TESS_CTRL] !=
573 ctx->TessCtrlProgram._Current) {
574 brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
575 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
576 }
577
578 if (brw->programs[MESA_SHADER_GEOMETRY] !=
579 ctx->GeometryProgram._Current) {
580 brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
581 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
582 }
583
584 if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
585 brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
586 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
587 }
588 }
589
590 if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
591 brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
592 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
593 }
594
595 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
596 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
597 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
598 }
599
600 if (brw->num_samples != fb_samples) {
601 brw->num_samples = fb_samples;
602 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
603 }
604
605 /* Exit early if no state is flagged as dirty */
606 merge_ctx_state(brw, &state);
607 if ((state.mesa | state.brw) == 0)
608 return;
609
610 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
611 if (devinfo->gen == 6)
612 brw_emit_post_sync_nonzero_flush(brw);
613
614 brw_upload_programs(brw, pipeline);
615 merge_ctx_state(brw, &state);
616
617 brw_upload_state_base_address(brw);
618
619 const struct brw_tracked_state *atoms =
620 brw_get_pipeline_atoms(brw, pipeline);
621 const int num_atoms = brw->num_atoms[pipeline];
622
623 if (unlikely(INTEL_DEBUG)) {
624 /* Debug version which enforces various sanity checks on the
625 * state flags which are generated and checked to help ensure
626 * state atoms are ordered correctly in the list.
627 */
628 struct brw_state_flags examined, prev;
629 memset(&examined, 0, sizeof(examined));
630 prev = state;
631
632 for (i = 0; i < num_atoms; i++) {
633 const struct brw_tracked_state *atom = &atoms[i];
634 struct brw_state_flags generated;
635
636 check_and_emit_atom(brw, &state, atom);
637
638 accumulate_state(&examined, &atom->dirty);
639
640 /* generated = (prev ^ state)
641 * if (examined & generated)
642 * fail;
643 */
644 xor_states(&generated, &prev, &state);
645 assert(!check_state(&examined, &generated));
646 prev = state;
647 }
648 }
649 else {
650 for (i = 0; i < num_atoms; i++) {
651 const struct brw_tracked_state *atom = &atoms[i];
652
653 check_and_emit_atom(brw, &state, atom);
654 }
655 }
656
657 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
658 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
659
660 brw_update_dirty_count(mesa_bits, state.mesa);
661 brw_update_dirty_count(brw_bits, state.brw);
662 if (dirty_count++ % 1000 == 0) {
663 brw_print_dirty_count(mesa_bits);
664 brw_print_dirty_count(brw_bits);
665 fprintf(stderr, "\n");
666 }
667 }
668 }
669
670 /***********************************************************************
671 * Emit all state:
672 */
673 void brw_upload_render_state(struct brw_context *brw)
674 {
675 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
676 }
677
678 static inline void
679 brw_pipeline_state_finished(struct brw_context *brw,
680 enum brw_pipeline pipeline)
681 {
682 /* Save all dirty state into the other pipelines */
683 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
684 if (i != pipeline) {
685 brw->state.pipelines[i].mesa |= brw->NewGLState;
686 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
687 } else {
688 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
689 }
690 }
691
692 brw->NewGLState = 0;
693 brw->ctx.NewDriverState = 0ull;
694 }
695
696 /**
697 * Clear dirty bits to account for the fact that the state emitted by
698 * brw_upload_render_state() has been committed to the hardware. This is a
699 * separate call from brw_upload_render_state() because it's possible that
700 * after the call to brw_upload_render_state(), we will discover that we've
701 * run out of aperture space, and need to rewind the batch buffer to the state
702 * it had before the brw_upload_render_state() call.
703 */
704 void
705 brw_render_state_finished(struct brw_context *brw)
706 {
707 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
708 }
709
710 void
711 brw_upload_compute_state(struct brw_context *brw)
712 {
713 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
714 }
715
716 void
717 brw_compute_state_finished(struct brw_context *brw)
718 {
719 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
720 }