i965: implement WaEnableStateCacheRedirectToCS
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "main/framebuffer.h"
47
48 void
49 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
50 {
51 const struct gen_device_info *devinfo = &brw->screen->devinfo;
52 assert(devinfo->gen >= 9);
53
54 if (enable == brw->object_preemption)
55 return;
56
57 /* A fixed function pipe flush is required before modifying this field */
58 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
59
60 bool replay_mode = enable ?
61 GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
62
63 /* enable object level preemption */
64 brw_load_register_imm32(brw, CS_CHICKEN1,
65 replay_mode | GEN9_REPLAY_MODE_MASK);
66
67 brw->object_preemption = enable;
68 }
69
70 static void
71 brw_upload_initial_gpu_state(struct brw_context *brw)
72 {
73 const struct gen_device_info *devinfo = &brw->screen->devinfo;
74 const struct brw_compiler *compiler = brw->screen->compiler;
75
76 /* On platforms with hardware contexts, we can set our initial GPU state
77 * right away rather than doing it via state atoms. This saves a small
78 * amount of overhead on every draw call.
79 */
80 if (!brw->hw_ctx)
81 return;
82
83 if (devinfo->gen == 6)
84 brw_emit_post_sync_nonzero_flush(brw);
85
86 brw_upload_invariant_state(brw);
87
88 if (devinfo->gen == 11) {
89 /* The default behavior of bit 5 "Headerless Message for Pre-emptable
90 * Contexts" in SAMPLER MODE register is set to 0, which means
91 * headerless sampler messages are not allowed for pre-emptable
92 * contexts. Set the bit 5 to 1 to allow them.
93 */
94 brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
95 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
96 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
97
98 /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
99 * HALF_SLICE_CHICKEN7 register.
100 */
101 brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
102 TEXEL_OFFSET_FIX_MASK |
103 TEXEL_OFFSET_FIX_ENABLE);
104
105 /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
106 * in L3CNTLREG register. The default setting of the bit is not the
107 * desirable behavior.
108 */
109 brw_load_register_imm32(brw, GEN8_L3CNTLREG,
110 GEN8_L3CNTLREG_EDBC_NO_HANG);
111
112 /* WA_2204188704: Pixel Shader Panic dispatch must be disabled.
113 */
114 brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN3,
115 PS_THREAD_PANIC_DISPATCH_MASK |
116 PS_THREAD_PANIC_DISPATCH);
117
118 /* WaEnableStateCacheRedirectToCS:icl */
119 brw_load_register_imm32(brw, SLICE_COMMON_ECO_CHICKEN1,
120 GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE |
121 REG_MASK(GEN11_STATE_CACHE_REDIRECT_TO_CS_SECTION_ENABLE));
122 }
123
124 if (devinfo->gen == 10 || devinfo->gen == 11) {
125 /* From gen10 workaround table in h/w specs:
126 *
127 * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
128 * a value of 0xFFFF"
129 *
130 * This means that we end up setting the entire 3D_MODE state. Bits
131 * in this register control things such as slice hashing and we want
132 * the default values of zero at the moment.
133 */
134 BEGIN_BATCH(2);
135 OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
136 OUT_BATCH(0xFFFF << 16);
137 ADVANCE_BATCH();
138 }
139
140 if (devinfo->gen == 9) {
141 /* Recommended optimizations for Victim Cache eviction and floating
142 * point blending.
143 */
144 brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
145 REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
146 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
147 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
148 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
149
150 if (gen_device_info_is_9lp(devinfo)) {
151 brw_load_register_imm32(brw, GEN7_GT_MODE,
152 GEN9_SUBSLICE_HASHING_MASK_BITS |
153 GEN9_SUBSLICE_HASHING_16x16);
154 }
155 }
156
157 if (devinfo->gen >= 8) {
158 gen8_emit_3dstate_sample_pattern(brw);
159
160 BEGIN_BATCH(5);
161 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
162 OUT_BATCH(0);
163 OUT_BATCH(0);
164 OUT_BATCH(0);
165 OUT_BATCH(0);
166 ADVANCE_BATCH();
167
168 BEGIN_BATCH(2);
169 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
170 OUT_BATCH(0);
171 ADVANCE_BATCH();
172 }
173
174 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
175 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
176 *
177 * This is only safe on kernels with context isolation support.
178 */
179 if (!compiler->constant_buffer_0_is_relative) {
180 if (devinfo->gen >= 9) {
181 BEGIN_BATCH(3);
182 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
183 OUT_BATCH(CS_DEBUG_MODE2);
184 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
185 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
186 ADVANCE_BATCH();
187 } else if (devinfo->gen == 8) {
188 BEGIN_BATCH(3);
189 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
190 OUT_BATCH(INSTPM);
191 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
192 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
193 ADVANCE_BATCH();
194 }
195 }
196
197 brw->object_preemption = false;
198
199 if (devinfo->gen >= 10)
200 brw_enable_obj_preemption(brw, true);
201 }
202
203 static inline const struct brw_tracked_state *
204 brw_get_pipeline_atoms(struct brw_context *brw,
205 enum brw_pipeline pipeline)
206 {
207 switch (pipeline) {
208 case BRW_RENDER_PIPELINE:
209 return brw->render_atoms;
210 case BRW_COMPUTE_PIPELINE:
211 return brw->compute_atoms;
212 default:
213 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
214 unreachable("Unsupported pipeline");
215 return NULL;
216 }
217 }
218
219 void
220 brw_copy_pipeline_atoms(struct brw_context *brw,
221 enum brw_pipeline pipeline,
222 const struct brw_tracked_state **atoms,
223 int num_atoms)
224 {
225 /* This is to work around brw_context::atoms being declared const. We want
226 * it to be const, but it needs to be initialized somehow!
227 */
228 struct brw_tracked_state *context_atoms =
229 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
230
231 for (int i = 0; i < num_atoms; i++) {
232 context_atoms[i] = *atoms[i];
233 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
234 assert(context_atoms[i].emit);
235 }
236
237 brw->num_atoms[pipeline] = num_atoms;
238 }
239
240 void brw_init_state( struct brw_context *brw )
241 {
242 struct gl_context *ctx = &brw->ctx;
243 const struct gen_device_info *devinfo = &brw->screen->devinfo;
244
245 /* Force the first brw_select_pipeline to emit pipeline select */
246 brw->last_pipeline = BRW_NUM_PIPELINES;
247
248 brw_init_caches(brw);
249
250 if (devinfo->gen >= 11)
251 gen11_init_atoms(brw);
252 else if (devinfo->gen >= 10)
253 gen10_init_atoms(brw);
254 else if (devinfo->gen >= 9)
255 gen9_init_atoms(brw);
256 else if (devinfo->gen >= 8)
257 gen8_init_atoms(brw);
258 else if (devinfo->is_haswell)
259 gen75_init_atoms(brw);
260 else if (devinfo->gen >= 7)
261 gen7_init_atoms(brw);
262 else if (devinfo->gen >= 6)
263 gen6_init_atoms(brw);
264 else if (devinfo->gen >= 5)
265 gen5_init_atoms(brw);
266 else if (devinfo->is_g4x)
267 gen45_init_atoms(brw);
268 else
269 gen4_init_atoms(brw);
270
271 brw_upload_initial_gpu_state(brw);
272
273 brw->NewGLState = ~0;
274 brw->ctx.NewDriverState = ~0ull;
275
276 /* ~0 is a nonsensical value which won't match anything we program, so
277 * the programming will take effect on the first time around.
278 */
279 brw->pma_stall_bits = ~0;
280
281 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
282 * dirty flags.
283 */
284 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
285
286 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
287 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
288 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
289 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
290 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
291 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
292 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
293 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
294 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
295 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
296 }
297
298
299 void brw_destroy_state( struct brw_context *brw )
300 {
301 brw_destroy_caches(brw);
302 }
303
304 /***********************************************************************
305 */
306
307 static bool
308 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
309 {
310 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
311 }
312
313 static void accumulate_state( struct brw_state_flags *a,
314 const struct brw_state_flags *b )
315 {
316 a->mesa |= b->mesa;
317 a->brw |= b->brw;
318 }
319
320
321 static void xor_states( struct brw_state_flags *result,
322 const struct brw_state_flags *a,
323 const struct brw_state_flags *b )
324 {
325 result->mesa = a->mesa ^ b->mesa;
326 result->brw = a->brw ^ b->brw;
327 }
328
329 struct dirty_bit_map {
330 uint64_t bit;
331 char *name;
332 uint32_t count;
333 };
334
335 #define DEFINE_BIT(name) {name, #name, 0}
336
337 static struct dirty_bit_map mesa_bits[] = {
338 DEFINE_BIT(_NEW_MODELVIEW),
339 DEFINE_BIT(_NEW_PROJECTION),
340 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
341 DEFINE_BIT(_NEW_COLOR),
342 DEFINE_BIT(_NEW_DEPTH),
343 DEFINE_BIT(_NEW_EVAL),
344 DEFINE_BIT(_NEW_FOG),
345 DEFINE_BIT(_NEW_HINT),
346 DEFINE_BIT(_NEW_LIGHT),
347 DEFINE_BIT(_NEW_LINE),
348 DEFINE_BIT(_NEW_PIXEL),
349 DEFINE_BIT(_NEW_POINT),
350 DEFINE_BIT(_NEW_POLYGON),
351 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
352 DEFINE_BIT(_NEW_SCISSOR),
353 DEFINE_BIT(_NEW_STENCIL),
354 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
355 DEFINE_BIT(_NEW_TRANSFORM),
356 DEFINE_BIT(_NEW_VIEWPORT),
357 DEFINE_BIT(_NEW_TEXTURE_STATE),
358 DEFINE_BIT(_NEW_ARRAY),
359 DEFINE_BIT(_NEW_RENDERMODE),
360 DEFINE_BIT(_NEW_BUFFERS),
361 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
362 DEFINE_BIT(_NEW_MULTISAMPLE),
363 DEFINE_BIT(_NEW_TRACK_MATRIX),
364 DEFINE_BIT(_NEW_PROGRAM),
365 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
366 DEFINE_BIT(_NEW_FRAG_CLAMP),
367 /* Avoid sign extension problems. */
368 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
369 {0, 0, 0}
370 };
371
372 static struct dirty_bit_map brw_bits[] = {
373 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
374 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
375 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
376 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
377 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
378 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
379 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
380 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
381 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
382 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
383 DEFINE_BIT(BRW_NEW_URB_FENCE),
384 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
385 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
386 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
387 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
388 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
389 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
390 DEFINE_BIT(BRW_NEW_PRIMITIVE),
391 DEFINE_BIT(BRW_NEW_CONTEXT),
392 DEFINE_BIT(BRW_NEW_PSP),
393 DEFINE_BIT(BRW_NEW_SURFACES),
394 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
395 DEFINE_BIT(BRW_NEW_INDICES),
396 DEFINE_BIT(BRW_NEW_VERTICES),
397 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
398 DEFINE_BIT(BRW_NEW_BATCH),
399 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
400 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
401 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
402 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
403 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
404 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
405 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
406 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
407 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
408 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
409 DEFINE_BIT(BRW_NEW_STATS_WM),
410 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
411 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
412 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
413 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
414 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
415 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
416 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
417 DEFINE_BIT(BRW_NEW_CC_VP),
418 DEFINE_BIT(BRW_NEW_SF_VP),
419 DEFINE_BIT(BRW_NEW_CLIP_VP),
420 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
421 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
422 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
423 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
424 DEFINE_BIT(BRW_NEW_URB_SIZE),
425 DEFINE_BIT(BRW_NEW_CC_STATE),
426 DEFINE_BIT(BRW_NEW_BLORP),
427 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
428 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
429 DEFINE_BIT(BRW_NEW_DRAW_CALL),
430 DEFINE_BIT(BRW_NEW_AUX_STATE),
431 {0, 0, 0}
432 };
433
434 static void
435 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
436 {
437 for (int i = 0; bit_map[i].bit != 0; i++) {
438 if (bit_map[i].bit & bits)
439 bit_map[i].count++;
440 }
441 }
442
443 static void
444 brw_print_dirty_count(struct dirty_bit_map *bit_map)
445 {
446 for (int i = 0; bit_map[i].bit != 0; i++) {
447 if (bit_map[i].count > 1) {
448 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
449 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
450 }
451 }
452 }
453
454 static inline void
455 brw_upload_tess_programs(struct brw_context *brw)
456 {
457 if (brw->programs[MESA_SHADER_TESS_EVAL]) {
458 brw_upload_tcs_prog(brw);
459 brw_upload_tes_prog(brw);
460 } else {
461 brw->tcs.base.prog_data = NULL;
462 brw->tes.base.prog_data = NULL;
463 }
464 }
465
466 static inline void
467 brw_upload_programs(struct brw_context *brw,
468 enum brw_pipeline pipeline)
469 {
470 struct gl_context *ctx = &brw->ctx;
471 const struct gen_device_info *devinfo = &brw->screen->devinfo;
472
473 if (pipeline == BRW_RENDER_PIPELINE) {
474 brw_upload_vs_prog(brw);
475 brw_upload_tess_programs(brw);
476
477 if (brw->programs[MESA_SHADER_GEOMETRY]) {
478 brw_upload_gs_prog(brw);
479 } else {
480 brw->gs.base.prog_data = NULL;
481 if (devinfo->gen < 7)
482 brw_upload_ff_gs_prog(brw);
483 }
484
485 /* Update the VUE map for data exiting the GS stage of the pipeline.
486 * This comes from the last enabled shader stage.
487 */
488 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
489 bool old_separate = brw->vue_map_geom_out.separate;
490 struct brw_vue_prog_data *vue_prog_data;
491 if (brw->programs[MESA_SHADER_GEOMETRY])
492 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
493 else if (brw->programs[MESA_SHADER_TESS_EVAL])
494 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
495 else
496 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
497
498 brw->vue_map_geom_out = vue_prog_data->vue_map;
499
500 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
501 if (old_slots != brw->vue_map_geom_out.slots_valid ||
502 old_separate != brw->vue_map_geom_out.separate)
503 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
504
505 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
506 VARYING_BIT_VIEWPORT) {
507 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
508 brw->clip.viewport_count =
509 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
510 ctx->Const.MaxViewports : 1;
511 }
512
513 brw_upload_wm_prog(brw);
514
515 if (devinfo->gen < 6) {
516 brw_upload_clip_prog(brw);
517 brw_upload_sf_prog(brw);
518 }
519
520 brw_disk_cache_write_render_programs(brw);
521 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
522 brw_upload_cs_prog(brw);
523 brw_disk_cache_write_compute_program(brw);
524 }
525 }
526
527 static inline void
528 merge_ctx_state(struct brw_context *brw,
529 struct brw_state_flags *state)
530 {
531 state->mesa |= brw->NewGLState;
532 state->brw |= brw->ctx.NewDriverState;
533 }
534
535 static ALWAYS_INLINE void
536 check_and_emit_atom(struct brw_context *brw,
537 struct brw_state_flags *state,
538 const struct brw_tracked_state *atom)
539 {
540 if (check_state(state, &atom->dirty)) {
541 atom->emit(brw);
542 merge_ctx_state(brw, state);
543 }
544 }
545
546 static inline void
547 brw_upload_pipeline_state(struct brw_context *brw,
548 enum brw_pipeline pipeline)
549 {
550 const struct gen_device_info *devinfo = &brw->screen->devinfo;
551 struct gl_context *ctx = &brw->ctx;
552 int i;
553 static int dirty_count = 0;
554 struct brw_state_flags state = brw->state.pipelines[pipeline];
555 const unsigned fb_samples =
556 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
557
558 brw_select_pipeline(brw, pipeline);
559
560 if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
561 /* Always re-emit all state. */
562 brw->NewGLState = ~0;
563 ctx->NewDriverState = ~0ull;
564 }
565
566 if (pipeline == BRW_RENDER_PIPELINE) {
567 if (brw->programs[MESA_SHADER_FRAGMENT] !=
568 ctx->FragmentProgram._Current) {
569 brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
570 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
571 }
572
573 if (brw->programs[MESA_SHADER_TESS_EVAL] !=
574 ctx->TessEvalProgram._Current) {
575 brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
576 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
577 }
578
579 if (brw->programs[MESA_SHADER_TESS_CTRL] !=
580 ctx->TessCtrlProgram._Current) {
581 brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
582 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
583 }
584
585 if (brw->programs[MESA_SHADER_GEOMETRY] !=
586 ctx->GeometryProgram._Current) {
587 brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
588 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
589 }
590
591 if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
592 brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
593 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
594 }
595 }
596
597 if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
598 brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
599 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
600 }
601
602 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
603 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
604 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
605 }
606
607 if (brw->num_samples != fb_samples) {
608 brw->num_samples = fb_samples;
609 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
610 }
611
612 /* Exit early if no state is flagged as dirty */
613 merge_ctx_state(brw, &state);
614 if ((state.mesa | state.brw) == 0)
615 return;
616
617 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
618 if (devinfo->gen == 6)
619 brw_emit_post_sync_nonzero_flush(brw);
620
621 brw_upload_programs(brw, pipeline);
622 merge_ctx_state(brw, &state);
623
624 brw_upload_state_base_address(brw);
625
626 const struct brw_tracked_state *atoms =
627 brw_get_pipeline_atoms(brw, pipeline);
628 const int num_atoms = brw->num_atoms[pipeline];
629
630 if (unlikely(INTEL_DEBUG)) {
631 /* Debug version which enforces various sanity checks on the
632 * state flags which are generated and checked to help ensure
633 * state atoms are ordered correctly in the list.
634 */
635 struct brw_state_flags examined, prev;
636 memset(&examined, 0, sizeof(examined));
637 prev = state;
638
639 for (i = 0; i < num_atoms; i++) {
640 const struct brw_tracked_state *atom = &atoms[i];
641 struct brw_state_flags generated;
642
643 check_and_emit_atom(brw, &state, atom);
644
645 accumulate_state(&examined, &atom->dirty);
646
647 /* generated = (prev ^ state)
648 * if (examined & generated)
649 * fail;
650 */
651 xor_states(&generated, &prev, &state);
652 assert(!check_state(&examined, &generated));
653 prev = state;
654 }
655 }
656 else {
657 for (i = 0; i < num_atoms; i++) {
658 const struct brw_tracked_state *atom = &atoms[i];
659
660 check_and_emit_atom(brw, &state, atom);
661 }
662 }
663
664 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
665 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
666
667 brw_update_dirty_count(mesa_bits, state.mesa);
668 brw_update_dirty_count(brw_bits, state.brw);
669 if (dirty_count++ % 1000 == 0) {
670 brw_print_dirty_count(mesa_bits);
671 brw_print_dirty_count(brw_bits);
672 fprintf(stderr, "\n");
673 }
674 }
675 }
676
677 /***********************************************************************
678 * Emit all state:
679 */
680 void brw_upload_render_state(struct brw_context *brw)
681 {
682 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
683 }
684
685 static inline void
686 brw_pipeline_state_finished(struct brw_context *brw,
687 enum brw_pipeline pipeline)
688 {
689 /* Save all dirty state into the other pipelines */
690 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
691 if (i != pipeline) {
692 brw->state.pipelines[i].mesa |= brw->NewGLState;
693 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
694 } else {
695 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
696 }
697 }
698
699 brw->NewGLState = 0;
700 brw->ctx.NewDriverState = 0ull;
701 }
702
703 /**
704 * Clear dirty bits to account for the fact that the state emitted by
705 * brw_upload_render_state() has been committed to the hardware. This is a
706 * separate call from brw_upload_render_state() because it's possible that
707 * after the call to brw_upload_render_state(), we will discover that we've
708 * run out of aperture space, and need to rewind the batch buffer to the state
709 * it had before the brw_upload_render_state() call.
710 */
711 void
712 brw_render_state_finished(struct brw_context *brw)
713 {
714 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
715 }
716
717 void
718 brw_upload_compute_state(struct brw_context *brw)
719 {
720 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
721 }
722
723 void
724 brw_compute_state_finished(struct brw_context *brw)
725 {
726 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
727 }