i965: store workaround_bo offset
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "genxml/genX_bits.h"
47 #include "main/framebuffer.h"
48
49 void
50 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
51 {
52 ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
53 assert(devinfo->gen >= 9);
54
55 if (enable == brw->object_preemption)
56 return;
57
58 /* A fixed function pipe flush is required before modifying this field */
59 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
60
61 bool replay_mode = enable ?
62 GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
63
64 /* enable object level preemption */
65 brw_load_register_imm32(brw, CS_CHICKEN1,
66 replay_mode | GEN9_REPLAY_MODE_MASK);
67
68 brw->object_preemption = enable;
69 }
70
71 static void
72 brw_upload_gen11_slice_hashing_state(struct brw_context *brw)
73 {
74 const struct gen_device_info *devinfo = &brw->screen->devinfo;
75 int subslices_delta =
76 devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
77 if (subslices_delta == 0)
78 return;
79
80 unsigned size = GEN11_SLICE_HASH_TABLE_length * 4;
81 uint32_t hash_address;
82
83 uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
84
85 unsigned idx = 0;
86
87 unsigned sl_small = 0;
88 unsigned sl_big = 1;
89 if (subslices_delta > 0) {
90 sl_small = 1;
91 sl_big = 0;
92 }
93
94 /**
95 * Create a 16x16 slice hashing table like the following one:
96 *
97 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
98 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
99 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
100 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
101 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
102 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
103 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
104 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
105 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
106 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
107 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
108 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
109 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
110 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
111 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
112 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
113 *
114 * The table above is used when the pixel pipe 0 has less subslices than
115 * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
116 * with 0's and 1's inverted is used.
117 */
118 for (int i = 0; i < GEN11_SLICE_HASH_TABLE_length; i++) {
119 uint32_t dw = 0;
120
121 for (int j = 0; j < 8; j++) {
122 unsigned slice = idx++ % 3 ? sl_big : sl_small;
123 dw |= slice << (j * 4);
124 }
125 map[i] = dw;
126 }
127
128 BEGIN_BATCH(2);
129 OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
130 OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
131 ADVANCE_BATCH();
132
133 /* From gen10/gen11 workaround table in h/w specs:
134 *
135 * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
136 * a value of 0xFFFF"
137 *
138 * This means that whenever we update a field with this instruction, we need
139 * to update all the others.
140 *
141 * Since this is the first time we emit this
142 * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
143 * and leaving everything else at their default state (0).
144 */
145 BEGIN_BATCH(2);
146 OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
147 OUT_BATCH(0xffff | SLICE_HASHING_TABLE_ENABLE);
148 ADVANCE_BATCH();
149 }
150
151 static void
152 brw_upload_initial_gpu_state(struct brw_context *brw)
153 {
154 const struct gen_device_info *devinfo = &brw->screen->devinfo;
155 const struct brw_compiler *compiler = brw->screen->compiler;
156
157 /* On platforms with hardware contexts, we can set our initial GPU state
158 * right away rather than doing it via state atoms. This saves a small
159 * amount of overhead on every draw call.
160 */
161 if (!brw->hw_ctx)
162 return;
163
164 if (devinfo->gen == 6)
165 brw_emit_post_sync_nonzero_flush(brw);
166
167 brw_upload_invariant_state(brw);
168
169 if (devinfo->gen == 11) {
170 /* The default behavior of bit 5 "Headerless Message for Pre-emptable
171 * Contexts" in SAMPLER MODE register is set to 0, which means
172 * headerless sampler messages are not allowed for pre-emptable
173 * contexts. Set the bit 5 to 1 to allow them.
174 */
175 brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
176 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
177 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
178
179 /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
180 * HALF_SLICE_CHICKEN7 register.
181 */
182 brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
183 TEXEL_OFFSET_FIX_MASK |
184 TEXEL_OFFSET_FIX_ENABLE);
185
186 /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
187 * in L3CNTLREG register. The default setting of the bit is not the
188 * desirable behavior.
189 */
190 brw_load_register_imm32(brw, GEN8_L3CNTLREG,
191 GEN8_L3CNTLREG_EDBC_NO_HANG);
192 }
193
194 /* hardware specification recommends disabling repacking for
195 * the compatibility with decompression mechanism in display controller.
196 */
197 if (devinfo->disable_ccs_repack) {
198 brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
199 GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
200 REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
201 }
202
203 if (devinfo->gen == 9) {
204 /* Recommended optimizations for Victim Cache eviction and floating
205 * point blending.
206 */
207 brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
208 REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
209 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
210 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
211 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
212 }
213
214 if (devinfo->gen >= 8) {
215 gen8_emit_3dstate_sample_pattern(brw);
216
217 BEGIN_BATCH(5);
218 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
219 OUT_BATCH(0);
220 OUT_BATCH(0);
221 OUT_BATCH(0);
222 OUT_BATCH(0);
223 ADVANCE_BATCH();
224
225 BEGIN_BATCH(2);
226 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
227 OUT_BATCH(0);
228 ADVANCE_BATCH();
229 }
230
231 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
232 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
233 *
234 * This is only safe on kernels with context isolation support.
235 */
236 if (!compiler->constant_buffer_0_is_relative) {
237 if (devinfo->gen >= 9) {
238 BEGIN_BATCH(3);
239 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
240 OUT_BATCH(CS_DEBUG_MODE2);
241 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
242 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
243 ADVANCE_BATCH();
244 } else if (devinfo->gen == 8) {
245 BEGIN_BATCH(3);
246 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
247 OUT_BATCH(INSTPM);
248 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
249 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
250 ADVANCE_BATCH();
251 }
252 }
253
254 brw->object_preemption = false;
255
256 if (devinfo->gen >= 10)
257 brw_enable_obj_preemption(brw, true);
258
259 if (devinfo->gen == 11)
260 brw_upload_gen11_slice_hashing_state(brw);
261 }
262
263 static inline const struct brw_tracked_state *
264 brw_get_pipeline_atoms(struct brw_context *brw,
265 enum brw_pipeline pipeline)
266 {
267 switch (pipeline) {
268 case BRW_RENDER_PIPELINE:
269 return brw->render_atoms;
270 case BRW_COMPUTE_PIPELINE:
271 return brw->compute_atoms;
272 default:
273 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
274 unreachable("Unsupported pipeline");
275 return NULL;
276 }
277 }
278
279 void
280 brw_copy_pipeline_atoms(struct brw_context *brw,
281 enum brw_pipeline pipeline,
282 const struct brw_tracked_state **atoms,
283 int num_atoms)
284 {
285 /* This is to work around brw_context::atoms being declared const. We want
286 * it to be const, but it needs to be initialized somehow!
287 */
288 struct brw_tracked_state *context_atoms =
289 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
290
291 for (int i = 0; i < num_atoms; i++) {
292 context_atoms[i] = *atoms[i];
293 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
294 assert(context_atoms[i].emit);
295 }
296
297 brw->num_atoms[pipeline] = num_atoms;
298 }
299
300 void brw_init_state( struct brw_context *brw )
301 {
302 struct gl_context *ctx = &brw->ctx;
303 const struct gen_device_info *devinfo = &brw->screen->devinfo;
304
305 /* Force the first brw_select_pipeline to emit pipeline select */
306 brw->last_pipeline = BRW_NUM_PIPELINES;
307
308 brw_init_caches(brw);
309
310 if (devinfo->gen >= 11)
311 gen11_init_atoms(brw);
312 else if (devinfo->gen >= 10)
313 gen10_init_atoms(brw);
314 else if (devinfo->gen >= 9)
315 gen9_init_atoms(brw);
316 else if (devinfo->gen >= 8)
317 gen8_init_atoms(brw);
318 else if (devinfo->is_haswell)
319 gen75_init_atoms(brw);
320 else if (devinfo->gen >= 7)
321 gen7_init_atoms(brw);
322 else if (devinfo->gen >= 6)
323 gen6_init_atoms(brw);
324 else if (devinfo->gen >= 5)
325 gen5_init_atoms(brw);
326 else if (devinfo->is_g4x)
327 gen45_init_atoms(brw);
328 else
329 gen4_init_atoms(brw);
330
331 brw_upload_initial_gpu_state(brw);
332
333 brw->NewGLState = ~0;
334 brw->ctx.NewDriverState = ~0ull;
335
336 /* ~0 is a nonsensical value which won't match anything we program, so
337 * the programming will take effect on the first time around.
338 */
339 brw->pma_stall_bits = ~0;
340
341 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
342 * dirty flags.
343 */
344 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
345
346 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
347 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
348 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
349 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
350 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
351 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
352 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
353 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
354 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
355 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
356 }
357
358
359 void brw_destroy_state( struct brw_context *brw )
360 {
361 brw_destroy_caches(brw);
362 }
363
364 /***********************************************************************
365 */
366
367 static bool
368 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
369 {
370 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
371 }
372
373 static void accumulate_state( struct brw_state_flags *a,
374 const struct brw_state_flags *b )
375 {
376 a->mesa |= b->mesa;
377 a->brw |= b->brw;
378 }
379
380
381 static void xor_states( struct brw_state_flags *result,
382 const struct brw_state_flags *a,
383 const struct brw_state_flags *b )
384 {
385 result->mesa = a->mesa ^ b->mesa;
386 result->brw = a->brw ^ b->brw;
387 }
388
389 struct dirty_bit_map {
390 uint64_t bit;
391 char *name;
392 uint32_t count;
393 };
394
395 #define DEFINE_BIT(name) {name, #name, 0}
396
397 static struct dirty_bit_map mesa_bits[] = {
398 DEFINE_BIT(_NEW_MODELVIEW),
399 DEFINE_BIT(_NEW_PROJECTION),
400 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
401 DEFINE_BIT(_NEW_COLOR),
402 DEFINE_BIT(_NEW_DEPTH),
403 DEFINE_BIT(_NEW_FOG),
404 DEFINE_BIT(_NEW_HINT),
405 DEFINE_BIT(_NEW_LIGHT),
406 DEFINE_BIT(_NEW_LINE),
407 DEFINE_BIT(_NEW_PIXEL),
408 DEFINE_BIT(_NEW_POINT),
409 DEFINE_BIT(_NEW_POLYGON),
410 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
411 DEFINE_BIT(_NEW_SCISSOR),
412 DEFINE_BIT(_NEW_STENCIL),
413 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
414 DEFINE_BIT(_NEW_TRANSFORM),
415 DEFINE_BIT(_NEW_VIEWPORT),
416 DEFINE_BIT(_NEW_TEXTURE_STATE),
417 DEFINE_BIT(_NEW_RENDERMODE),
418 DEFINE_BIT(_NEW_BUFFERS),
419 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
420 DEFINE_BIT(_NEW_MULTISAMPLE),
421 DEFINE_BIT(_NEW_TRACK_MATRIX),
422 DEFINE_BIT(_NEW_PROGRAM),
423 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
424 DEFINE_BIT(_NEW_FRAG_CLAMP),
425 /* Avoid sign extension problems. */
426 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
427 {0, 0, 0}
428 };
429
430 static struct dirty_bit_map brw_bits[] = {
431 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
432 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
433 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
434 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
435 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
436 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
437 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
438 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
439 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
440 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
441 DEFINE_BIT(BRW_NEW_URB_FENCE),
442 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
443 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
444 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
445 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
446 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
447 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
448 DEFINE_BIT(BRW_NEW_PRIMITIVE),
449 DEFINE_BIT(BRW_NEW_CONTEXT),
450 DEFINE_BIT(BRW_NEW_PSP),
451 DEFINE_BIT(BRW_NEW_SURFACES),
452 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
453 DEFINE_BIT(BRW_NEW_INDICES),
454 DEFINE_BIT(BRW_NEW_VERTICES),
455 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
456 DEFINE_BIT(BRW_NEW_BATCH),
457 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
458 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
459 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
460 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
461 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
462 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
463 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
464 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
465 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
466 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
467 DEFINE_BIT(BRW_NEW_STATS_WM),
468 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
469 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
470 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
471 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
472 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
473 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
474 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
475 DEFINE_BIT(BRW_NEW_CC_VP),
476 DEFINE_BIT(BRW_NEW_SF_VP),
477 DEFINE_BIT(BRW_NEW_CLIP_VP),
478 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
479 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
480 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
481 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
482 DEFINE_BIT(BRW_NEW_URB_SIZE),
483 DEFINE_BIT(BRW_NEW_CC_STATE),
484 DEFINE_BIT(BRW_NEW_BLORP),
485 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
486 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
487 DEFINE_BIT(BRW_NEW_DRAW_CALL),
488 DEFINE_BIT(BRW_NEW_AUX_STATE),
489 {0, 0, 0}
490 };
491
492 static void
493 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
494 {
495 for (int i = 0; bit_map[i].bit != 0; i++) {
496 if (bit_map[i].bit & bits)
497 bit_map[i].count++;
498 }
499 }
500
501 static void
502 brw_print_dirty_count(struct dirty_bit_map *bit_map)
503 {
504 for (int i = 0; bit_map[i].bit != 0; i++) {
505 if (bit_map[i].count > 1) {
506 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
507 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
508 }
509 }
510 }
511
512 static inline void
513 brw_upload_tess_programs(struct brw_context *brw)
514 {
515 if (brw->programs[MESA_SHADER_TESS_EVAL]) {
516 brw_upload_tcs_prog(brw);
517 brw_upload_tes_prog(brw);
518 } else {
519 brw->tcs.base.prog_data = NULL;
520 brw->tes.base.prog_data = NULL;
521 }
522 }
523
524 static inline void
525 brw_upload_programs(struct brw_context *brw,
526 enum brw_pipeline pipeline)
527 {
528 struct gl_context *ctx = &brw->ctx;
529 const struct gen_device_info *devinfo = &brw->screen->devinfo;
530
531 if (pipeline == BRW_RENDER_PIPELINE) {
532 brw_upload_vs_prog(brw);
533 brw_upload_tess_programs(brw);
534
535 if (brw->programs[MESA_SHADER_GEOMETRY]) {
536 brw_upload_gs_prog(brw);
537 } else {
538 brw->gs.base.prog_data = NULL;
539 if (devinfo->gen < 7)
540 brw_upload_ff_gs_prog(brw);
541 }
542
543 /* Update the VUE map for data exiting the GS stage of the pipeline.
544 * This comes from the last enabled shader stage.
545 */
546 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
547 bool old_separate = brw->vue_map_geom_out.separate;
548 struct brw_vue_prog_data *vue_prog_data;
549 if (brw->programs[MESA_SHADER_GEOMETRY])
550 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
551 else if (brw->programs[MESA_SHADER_TESS_EVAL])
552 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
553 else
554 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
555
556 brw->vue_map_geom_out = vue_prog_data->vue_map;
557
558 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
559 if (old_slots != brw->vue_map_geom_out.slots_valid ||
560 old_separate != brw->vue_map_geom_out.separate)
561 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
562
563 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
564 VARYING_BIT_VIEWPORT) {
565 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
566 brw->clip.viewport_count =
567 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
568 ctx->Const.MaxViewports : 1;
569 }
570
571 brw_upload_wm_prog(brw);
572
573 if (devinfo->gen < 6) {
574 brw_upload_clip_prog(brw);
575 brw_upload_sf_prog(brw);
576 }
577
578 brw_disk_cache_write_render_programs(brw);
579 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
580 brw_upload_cs_prog(brw);
581 brw_disk_cache_write_compute_program(brw);
582 }
583 }
584
585 static inline void
586 merge_ctx_state(struct brw_context *brw,
587 struct brw_state_flags *state)
588 {
589 state->mesa |= brw->NewGLState;
590 state->brw |= brw->ctx.NewDriverState;
591 }
592
593 static ALWAYS_INLINE void
594 check_and_emit_atom(struct brw_context *brw,
595 struct brw_state_flags *state,
596 const struct brw_tracked_state *atom)
597 {
598 if (check_state(state, &atom->dirty)) {
599 atom->emit(brw);
600 merge_ctx_state(brw, state);
601 }
602 }
603
604 static inline void
605 brw_upload_pipeline_state(struct brw_context *brw,
606 enum brw_pipeline pipeline)
607 {
608 const struct gen_device_info *devinfo = &brw->screen->devinfo;
609 struct gl_context *ctx = &brw->ctx;
610 int i;
611 static int dirty_count = 0;
612 struct brw_state_flags state = brw->state.pipelines[pipeline];
613 const unsigned fb_samples =
614 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
615
616 brw_select_pipeline(brw, pipeline);
617
618 if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
619 brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
620
621 if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
622 /* Always re-emit all state. */
623 brw->NewGLState = ~0;
624 ctx->NewDriverState = ~0ull;
625 }
626
627 if (pipeline == BRW_RENDER_PIPELINE) {
628 if (brw->programs[MESA_SHADER_FRAGMENT] !=
629 ctx->FragmentProgram._Current) {
630 brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
631 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
632 }
633
634 if (brw->programs[MESA_SHADER_TESS_EVAL] !=
635 ctx->TessEvalProgram._Current) {
636 brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
637 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
638 }
639
640 if (brw->programs[MESA_SHADER_TESS_CTRL] !=
641 ctx->TessCtrlProgram._Current) {
642 brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
643 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
644 }
645
646 if (brw->programs[MESA_SHADER_GEOMETRY] !=
647 ctx->GeometryProgram._Current) {
648 brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
649 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
650 }
651
652 if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
653 brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
654 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
655 }
656 }
657
658 if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
659 brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
660 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
661 }
662
663 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
664 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
665 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
666 }
667
668 if (brw->num_samples != fb_samples) {
669 brw->num_samples = fb_samples;
670 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
671 }
672
673 /* Exit early if no state is flagged as dirty */
674 merge_ctx_state(brw, &state);
675 if ((state.mesa | state.brw) == 0)
676 return;
677
678 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
679 if (devinfo->gen == 6)
680 brw_emit_post_sync_nonzero_flush(brw);
681
682 brw_upload_programs(brw, pipeline);
683 merge_ctx_state(brw, &state);
684
685 brw_upload_state_base_address(brw);
686
687 const struct brw_tracked_state *atoms =
688 brw_get_pipeline_atoms(brw, pipeline);
689 const int num_atoms = brw->num_atoms[pipeline];
690
691 if (unlikely(INTEL_DEBUG)) {
692 /* Debug version which enforces various sanity checks on the
693 * state flags which are generated and checked to help ensure
694 * state atoms are ordered correctly in the list.
695 */
696 struct brw_state_flags examined, prev;
697 memset(&examined, 0, sizeof(examined));
698 prev = state;
699
700 for (i = 0; i < num_atoms; i++) {
701 const struct brw_tracked_state *atom = &atoms[i];
702 struct brw_state_flags generated;
703
704 check_and_emit_atom(brw, &state, atom);
705
706 accumulate_state(&examined, &atom->dirty);
707
708 /* generated = (prev ^ state)
709 * if (examined & generated)
710 * fail;
711 */
712 xor_states(&generated, &prev, &state);
713 assert(!check_state(&examined, &generated));
714 prev = state;
715 }
716 }
717 else {
718 for (i = 0; i < num_atoms; i++) {
719 const struct brw_tracked_state *atom = &atoms[i];
720
721 check_and_emit_atom(brw, &state, atom);
722 }
723 }
724
725 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
726 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
727
728 brw_update_dirty_count(mesa_bits, state.mesa);
729 brw_update_dirty_count(brw_bits, state.brw);
730 if (dirty_count++ % 1000 == 0) {
731 brw_print_dirty_count(mesa_bits);
732 brw_print_dirty_count(brw_bits);
733 fprintf(stderr, "\n");
734 }
735 }
736 }
737
738 /***********************************************************************
739 * Emit all state:
740 */
741 void brw_upload_render_state(struct brw_context *brw)
742 {
743 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
744 }
745
746 static inline void
747 brw_pipeline_state_finished(struct brw_context *brw,
748 enum brw_pipeline pipeline)
749 {
750 /* Save all dirty state into the other pipelines */
751 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
752 if (i != pipeline) {
753 brw->state.pipelines[i].mesa |= brw->NewGLState;
754 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
755 } else {
756 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
757 }
758 }
759
760 brw->NewGLState = 0;
761 brw->ctx.NewDriverState = 0ull;
762 }
763
764 /**
765 * Clear dirty bits to account for the fact that the state emitted by
766 * brw_upload_render_state() has been committed to the hardware. This is a
767 * separate call from brw_upload_render_state() because it's possible that
768 * after the call to brw_upload_render_state(), we will discover that we've
769 * run out of aperture space, and need to rewind the batch buffer to the state
770 * it had before the brw_upload_render_state() call.
771 */
772 void
773 brw_render_state_finished(struct brw_context *brw)
774 {
775 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
776 }
777
778 void
779 brw_upload_compute_state(struct brw_context *brw)
780 {
781 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
782 }
783
784 void
785 brw_compute_state_finished(struct brw_context *brw)
786 {
787 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
788 }