dfbcea586cce3cfdf49e2a3796ba026aa4b9aafe
[mesa.git] / src / mesa / drivers / dri / i965 / brw_state_upload.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "brw_context.h"
35 #include "brw_defines.h"
36 #include "brw_state.h"
37 #include "brw_program.h"
38 #include "drivers/common/meta.h"
39 #include "intel_batchbuffer.h"
40 #include "intel_buffers.h"
41 #include "brw_vs.h"
42 #include "brw_ff_gs.h"
43 #include "brw_gs.h"
44 #include "brw_wm.h"
45 #include "brw_cs.h"
46 #include "genxml/genX_bits.h"
47 #include "main/framebuffer.h"
48
49 void
50 brw_enable_obj_preemption(struct brw_context *brw, bool enable)
51 {
52 ASSERTED const struct gen_device_info *devinfo = &brw->screen->devinfo;
53 assert(devinfo->gen >= 9);
54
55 if (enable == brw->object_preemption)
56 return;
57
58 /* A fixed function pipe flush is required before modifying this field */
59 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
60
61 bool replay_mode = enable ?
62 GEN9_REPLAY_MODE_MIDOBJECT : GEN9_REPLAY_MODE_MIDBUFFER;
63
64 /* enable object level preemption */
65 brw_load_register_imm32(brw, CS_CHICKEN1,
66 replay_mode | GEN9_REPLAY_MODE_MASK);
67
68 brw->object_preemption = enable;
69 }
70
71 static void
72 brw_upload_gen11_slice_hashing_state(struct brw_context *brw)
73 {
74 const struct gen_device_info *devinfo = &brw->screen->devinfo;
75 int subslices_delta =
76 devinfo->ppipe_subslices[0] - devinfo->ppipe_subslices[1];
77 if (subslices_delta == 0)
78 return;
79
80 unsigned size = GEN11_SLICE_HASH_TABLE_length * 4;
81 uint32_t hash_address;
82
83 uint32_t *map = brw_state_batch(brw, size, 64, &hash_address);
84
85 unsigned idx = 0;
86
87 unsigned sl_small = 0;
88 unsigned sl_big = 1;
89 if (subslices_delta > 0) {
90 sl_small = 1;
91 sl_big = 0;
92 }
93
94 /**
95 * Create a 16x16 slice hashing table like the following one:
96 *
97 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
98 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
99 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
100 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
101 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
102 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
103 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
104 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
105 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
106 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
107 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
108 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
109 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
110 * [ 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1 ]
111 * [ 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0 ]
112 * [ 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1 ]
113 *
114 * The table above is used when the pixel pipe 0 has less subslices than
115 * pixel pipe 1. When pixel pipe 0 has more subslices, then a similar table
116 * with 0's and 1's inverted is used.
117 */
118 for (int i = 0; i < GEN11_SLICE_HASH_TABLE_length; i++) {
119 uint32_t dw = 0;
120
121 for (int j = 0; j < 8; j++) {
122 unsigned slice = idx++ % 3 ? sl_big : sl_small;
123 dw |= slice << (j * 4);
124 }
125 map[i] = dw;
126 }
127
128 BEGIN_BATCH(2);
129 OUT_BATCH(_3DSTATE_SLICE_TABLE_STATE_POINTERS << 16 | (2 - 2));
130 OUT_RELOC(brw->batch.state.bo, 0, hash_address | 1);
131 ADVANCE_BATCH();
132
133 /* From gen10/gen11 workaround table in h/w specs:
134 *
135 * "On 3DSTATE_3D_MODE, driver must always program bits 31:16 of DW1
136 * a value of 0xFFFF"
137 *
138 * This means that whenever we update a field with this instruction, we need
139 * to update all the others.
140 *
141 * Since this is the first time we emit this
142 * instruction, we are only setting the fSLICE_HASHING_TABLE_ENABLE flag,
143 * and leaving everything else at their default state (0).
144 */
145 BEGIN_BATCH(2);
146 OUT_BATCH(_3DSTATE_3D_MODE << 16 | (2 - 2));
147 OUT_BATCH(0xffff | SLICE_HASHING_TABLE_ENABLE);
148 ADVANCE_BATCH();
149 }
150
151 static void
152 brw_upload_initial_gpu_state(struct brw_context *brw)
153 {
154 const struct gen_device_info *devinfo = &brw->screen->devinfo;
155 const struct brw_compiler *compiler = brw->screen->compiler;
156
157 /* On platforms with hardware contexts, we can set our initial GPU state
158 * right away rather than doing it via state atoms. This saves a small
159 * amount of overhead on every draw call.
160 */
161 if (!brw->hw_ctx)
162 return;
163
164 if (devinfo->gen == 6)
165 brw_emit_post_sync_nonzero_flush(brw);
166
167 brw_upload_invariant_state(brw);
168
169 if (devinfo->gen == 11) {
170 /* The default behavior of bit 5 "Headerless Message for Pre-emptable
171 * Contexts" in SAMPLER MODE register is set to 0, which means
172 * headerless sampler messages are not allowed for pre-emptable
173 * contexts. Set the bit 5 to 1 to allow them.
174 */
175 brw_load_register_imm32(brw, GEN11_SAMPLER_MODE,
176 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS_MASK |
177 HEADERLESS_MESSAGE_FOR_PREEMPTABLE_CONTEXTS);
178
179 /* Bit 1 "Enabled Texel Offset Precision Fix" must be set in
180 * HALF_SLICE_CHICKEN7 register.
181 */
182 brw_load_register_imm32(brw, HALF_SLICE_CHICKEN7,
183 TEXEL_OFFSET_FIX_MASK |
184 TEXEL_OFFSET_FIX_ENABLE);
185
186 /* WA_1406697149: Bit 9 "Error Detection Behavior Control" must be set
187 * in L3CNTLREG register. The default setting of the bit is not the
188 * desirable behavior.
189 */
190 brw_load_register_imm32(brw, GEN8_L3CNTLREG,
191 GEN8_L3CNTLREG_EDBC_NO_HANG);
192
193 /* WA_220160979: Enable Hardware filtering of Semi-Pipelined State in WM */
194 brw_load_register_imm32(brw, COMMON_SLICE_CHICKEN4,
195 GEN11_ENABLE_HARDWARE_FILTERING_IN_WM |
196 REG_MASK(GEN11_ENABLE_HARDWARE_FILTERING_IN_WM));
197 }
198
199 /* hardware specification recommends disabling repacking for
200 * the compatibility with decompression mechanism in display controller.
201 */
202 if (devinfo->disable_ccs_repack) {
203 brw_load_register_imm32(brw, GEN7_CACHE_MODE_0,
204 GEN11_DISABLE_REPACKING_FOR_COMPRESSION |
205 REG_MASK(GEN11_DISABLE_REPACKING_FOR_COMPRESSION));
206 }
207
208 if (devinfo->gen == 9) {
209 /* Recommended optimizations for Victim Cache eviction and floating
210 * point blending.
211 */
212 brw_load_register_imm32(brw, GEN7_CACHE_MODE_1,
213 REG_MASK(GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE) |
214 REG_MASK(GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC) |
215 GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
216 GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
217 }
218
219 if (devinfo->gen >= 8) {
220 gen8_emit_3dstate_sample_pattern(brw);
221
222 BEGIN_BATCH(5);
223 OUT_BATCH(_3DSTATE_WM_HZ_OP << 16 | (5 - 2));
224 OUT_BATCH(0);
225 OUT_BATCH(0);
226 OUT_BATCH(0);
227 OUT_BATCH(0);
228 ADVANCE_BATCH();
229
230 BEGIN_BATCH(2);
231 OUT_BATCH(_3DSTATE_WM_CHROMAKEY << 16 | (2 - 2));
232 OUT_BATCH(0);
233 ADVANCE_BATCH();
234 }
235
236 /* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
237 * 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
238 *
239 * This is only safe on kernels with context isolation support.
240 */
241 if (!compiler->constant_buffer_0_is_relative) {
242 if (devinfo->gen >= 9) {
243 BEGIN_BATCH(3);
244 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
245 OUT_BATCH(CS_DEBUG_MODE2);
246 OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
247 CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
248 ADVANCE_BATCH();
249 } else if (devinfo->gen == 8) {
250 BEGIN_BATCH(3);
251 OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
252 OUT_BATCH(INSTPM);
253 OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
254 INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
255 ADVANCE_BATCH();
256 }
257 }
258
259 brw->object_preemption = false;
260
261 if (devinfo->gen >= 10)
262 brw_enable_obj_preemption(brw, true);
263
264 if (devinfo->gen == 11)
265 brw_upload_gen11_slice_hashing_state(brw);
266 }
267
268 static inline const struct brw_tracked_state *
269 brw_get_pipeline_atoms(struct brw_context *brw,
270 enum brw_pipeline pipeline)
271 {
272 switch (pipeline) {
273 case BRW_RENDER_PIPELINE:
274 return brw->render_atoms;
275 case BRW_COMPUTE_PIPELINE:
276 return brw->compute_atoms;
277 default:
278 STATIC_ASSERT(BRW_NUM_PIPELINES == 2);
279 unreachable("Unsupported pipeline");
280 return NULL;
281 }
282 }
283
284 void
285 brw_copy_pipeline_atoms(struct brw_context *brw,
286 enum brw_pipeline pipeline,
287 const struct brw_tracked_state **atoms,
288 int num_atoms)
289 {
290 /* This is to work around brw_context::atoms being declared const. We want
291 * it to be const, but it needs to be initialized somehow!
292 */
293 struct brw_tracked_state *context_atoms =
294 (struct brw_tracked_state *) brw_get_pipeline_atoms(brw, pipeline);
295
296 for (int i = 0; i < num_atoms; i++) {
297 context_atoms[i] = *atoms[i];
298 assert(context_atoms[i].dirty.mesa | context_atoms[i].dirty.brw);
299 assert(context_atoms[i].emit);
300 }
301
302 brw->num_atoms[pipeline] = num_atoms;
303 }
304
305 void brw_init_state( struct brw_context *brw )
306 {
307 struct gl_context *ctx = &brw->ctx;
308 const struct gen_device_info *devinfo = &brw->screen->devinfo;
309
310 /* Force the first brw_select_pipeline to emit pipeline select */
311 brw->last_pipeline = BRW_NUM_PIPELINES;
312
313 brw_init_caches(brw);
314
315 if (devinfo->gen >= 11)
316 gen11_init_atoms(brw);
317 else if (devinfo->gen >= 10)
318 gen10_init_atoms(brw);
319 else if (devinfo->gen >= 9)
320 gen9_init_atoms(brw);
321 else if (devinfo->gen >= 8)
322 gen8_init_atoms(brw);
323 else if (devinfo->is_haswell)
324 gen75_init_atoms(brw);
325 else if (devinfo->gen >= 7)
326 gen7_init_atoms(brw);
327 else if (devinfo->gen >= 6)
328 gen6_init_atoms(brw);
329 else if (devinfo->gen >= 5)
330 gen5_init_atoms(brw);
331 else if (devinfo->is_g4x)
332 gen45_init_atoms(brw);
333 else
334 gen4_init_atoms(brw);
335
336 brw_upload_initial_gpu_state(brw);
337
338 brw->NewGLState = ~0;
339 brw->ctx.NewDriverState = ~0ull;
340
341 /* ~0 is a nonsensical value which won't match anything we program, so
342 * the programming will take effect on the first time around.
343 */
344 brw->pma_stall_bits = ~0;
345
346 /* Make sure that brw->ctx.NewDriverState has enough bits to hold all possible
347 * dirty flags.
348 */
349 STATIC_ASSERT(BRW_NUM_STATE_BITS <= 8 * sizeof(brw->ctx.NewDriverState));
350
351 ctx->DriverFlags.NewTransformFeedback = BRW_NEW_TRANSFORM_FEEDBACK;
352 ctx->DriverFlags.NewTransformFeedbackProg = BRW_NEW_TRANSFORM_FEEDBACK;
353 ctx->DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
354 ctx->DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
355 ctx->DriverFlags.NewShaderStorageBuffer = BRW_NEW_UNIFORM_BUFFER;
356 ctx->DriverFlags.NewTextureBuffer = BRW_NEW_TEXTURE_BUFFER;
357 ctx->DriverFlags.NewAtomicBuffer = BRW_NEW_UNIFORM_BUFFER;
358 ctx->DriverFlags.NewImageUnits = BRW_NEW_IMAGE_UNITS;
359 ctx->DriverFlags.NewDefaultTessLevels = BRW_NEW_DEFAULT_TESS_LEVELS;
360 ctx->DriverFlags.NewIntelConservativeRasterization = BRW_NEW_CONSERVATIVE_RASTERIZATION;
361 }
362
363
364 void brw_destroy_state( struct brw_context *brw )
365 {
366 brw_destroy_caches(brw);
367 }
368
369 /***********************************************************************
370 */
371
372 static bool
373 check_state(const struct brw_state_flags *a, const struct brw_state_flags *b)
374 {
375 return ((a->mesa & b->mesa) | (a->brw & b->brw)) != 0;
376 }
377
378 static void accumulate_state( struct brw_state_flags *a,
379 const struct brw_state_flags *b )
380 {
381 a->mesa |= b->mesa;
382 a->brw |= b->brw;
383 }
384
385
386 static void xor_states( struct brw_state_flags *result,
387 const struct brw_state_flags *a,
388 const struct brw_state_flags *b )
389 {
390 result->mesa = a->mesa ^ b->mesa;
391 result->brw = a->brw ^ b->brw;
392 }
393
394 struct dirty_bit_map {
395 uint64_t bit;
396 char *name;
397 uint32_t count;
398 };
399
400 #define DEFINE_BIT(name) {name, #name, 0}
401
402 static struct dirty_bit_map mesa_bits[] = {
403 DEFINE_BIT(_NEW_MODELVIEW),
404 DEFINE_BIT(_NEW_PROJECTION),
405 DEFINE_BIT(_NEW_TEXTURE_MATRIX),
406 DEFINE_BIT(_NEW_COLOR),
407 DEFINE_BIT(_NEW_DEPTH),
408 DEFINE_BIT(_NEW_EVAL),
409 DEFINE_BIT(_NEW_FOG),
410 DEFINE_BIT(_NEW_HINT),
411 DEFINE_BIT(_NEW_LIGHT),
412 DEFINE_BIT(_NEW_LINE),
413 DEFINE_BIT(_NEW_PIXEL),
414 DEFINE_BIT(_NEW_POINT),
415 DEFINE_BIT(_NEW_POLYGON),
416 DEFINE_BIT(_NEW_POLYGONSTIPPLE),
417 DEFINE_BIT(_NEW_SCISSOR),
418 DEFINE_BIT(_NEW_STENCIL),
419 DEFINE_BIT(_NEW_TEXTURE_OBJECT),
420 DEFINE_BIT(_NEW_TRANSFORM),
421 DEFINE_BIT(_NEW_VIEWPORT),
422 DEFINE_BIT(_NEW_TEXTURE_STATE),
423 DEFINE_BIT(_NEW_RENDERMODE),
424 DEFINE_BIT(_NEW_BUFFERS),
425 DEFINE_BIT(_NEW_CURRENT_ATTRIB),
426 DEFINE_BIT(_NEW_MULTISAMPLE),
427 DEFINE_BIT(_NEW_TRACK_MATRIX),
428 DEFINE_BIT(_NEW_PROGRAM),
429 DEFINE_BIT(_NEW_PROGRAM_CONSTANTS),
430 DEFINE_BIT(_NEW_FRAG_CLAMP),
431 /* Avoid sign extension problems. */
432 {(unsigned) _NEW_VARYING_VP_INPUTS, "_NEW_VARYING_VP_INPUTS", 0},
433 {0, 0, 0}
434 };
435
436 static struct dirty_bit_map brw_bits[] = {
437 DEFINE_BIT(BRW_NEW_FS_PROG_DATA),
438 DEFINE_BIT(BRW_NEW_BLORP_BLIT_PROG_DATA),
439 DEFINE_BIT(BRW_NEW_SF_PROG_DATA),
440 DEFINE_BIT(BRW_NEW_VS_PROG_DATA),
441 DEFINE_BIT(BRW_NEW_FF_GS_PROG_DATA),
442 DEFINE_BIT(BRW_NEW_GS_PROG_DATA),
443 DEFINE_BIT(BRW_NEW_TCS_PROG_DATA),
444 DEFINE_BIT(BRW_NEW_TES_PROG_DATA),
445 DEFINE_BIT(BRW_NEW_CLIP_PROG_DATA),
446 DEFINE_BIT(BRW_NEW_CS_PROG_DATA),
447 DEFINE_BIT(BRW_NEW_URB_FENCE),
448 DEFINE_BIT(BRW_NEW_FRAGMENT_PROGRAM),
449 DEFINE_BIT(BRW_NEW_GEOMETRY_PROGRAM),
450 DEFINE_BIT(BRW_NEW_TESS_PROGRAMS),
451 DEFINE_BIT(BRW_NEW_VERTEX_PROGRAM),
452 DEFINE_BIT(BRW_NEW_REDUCED_PRIMITIVE),
453 DEFINE_BIT(BRW_NEW_PATCH_PRIMITIVE),
454 DEFINE_BIT(BRW_NEW_PRIMITIVE),
455 DEFINE_BIT(BRW_NEW_CONTEXT),
456 DEFINE_BIT(BRW_NEW_PSP),
457 DEFINE_BIT(BRW_NEW_SURFACES),
458 DEFINE_BIT(BRW_NEW_BINDING_TABLE_POINTERS),
459 DEFINE_BIT(BRW_NEW_INDICES),
460 DEFINE_BIT(BRW_NEW_VERTICES),
461 DEFINE_BIT(BRW_NEW_DEFAULT_TESS_LEVELS),
462 DEFINE_BIT(BRW_NEW_BATCH),
463 DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
464 DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
465 DEFINE_BIT(BRW_NEW_TCS_CONSTBUF),
466 DEFINE_BIT(BRW_NEW_TES_CONSTBUF),
467 DEFINE_BIT(BRW_NEW_GS_CONSTBUF),
468 DEFINE_BIT(BRW_NEW_PROGRAM_CACHE),
469 DEFINE_BIT(BRW_NEW_STATE_BASE_ADDRESS),
470 DEFINE_BIT(BRW_NEW_VUE_MAP_GEOM_OUT),
471 DEFINE_BIT(BRW_NEW_TRANSFORM_FEEDBACK),
472 DEFINE_BIT(BRW_NEW_RASTERIZER_DISCARD),
473 DEFINE_BIT(BRW_NEW_STATS_WM),
474 DEFINE_BIT(BRW_NEW_UNIFORM_BUFFER),
475 DEFINE_BIT(BRW_NEW_IMAGE_UNITS),
476 DEFINE_BIT(BRW_NEW_META_IN_PROGRESS),
477 DEFINE_BIT(BRW_NEW_PUSH_CONSTANT_ALLOCATION),
478 DEFINE_BIT(BRW_NEW_NUM_SAMPLES),
479 DEFINE_BIT(BRW_NEW_TEXTURE_BUFFER),
480 DEFINE_BIT(BRW_NEW_GEN4_UNIT_STATE),
481 DEFINE_BIT(BRW_NEW_CC_VP),
482 DEFINE_BIT(BRW_NEW_SF_VP),
483 DEFINE_BIT(BRW_NEW_CLIP_VP),
484 DEFINE_BIT(BRW_NEW_SAMPLER_STATE_TABLE),
485 DEFINE_BIT(BRW_NEW_VS_ATTRIB_WORKAROUNDS),
486 DEFINE_BIT(BRW_NEW_COMPUTE_PROGRAM),
487 DEFINE_BIT(BRW_NEW_CS_WORK_GROUPS),
488 DEFINE_BIT(BRW_NEW_URB_SIZE),
489 DEFINE_BIT(BRW_NEW_CC_STATE),
490 DEFINE_BIT(BRW_NEW_BLORP),
491 DEFINE_BIT(BRW_NEW_VIEWPORT_COUNT),
492 DEFINE_BIT(BRW_NEW_CONSERVATIVE_RASTERIZATION),
493 DEFINE_BIT(BRW_NEW_DRAW_CALL),
494 DEFINE_BIT(BRW_NEW_AUX_STATE),
495 {0, 0, 0}
496 };
497
498 static void
499 brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
500 {
501 for (int i = 0; bit_map[i].bit != 0; i++) {
502 if (bit_map[i].bit & bits)
503 bit_map[i].count++;
504 }
505 }
506
507 static void
508 brw_print_dirty_count(struct dirty_bit_map *bit_map)
509 {
510 for (int i = 0; bit_map[i].bit != 0; i++) {
511 if (bit_map[i].count > 1) {
512 fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
513 bit_map[i].bit, bit_map[i].count, bit_map[i].name);
514 }
515 }
516 }
517
518 static inline void
519 brw_upload_tess_programs(struct brw_context *brw)
520 {
521 if (brw->programs[MESA_SHADER_TESS_EVAL]) {
522 brw_upload_tcs_prog(brw);
523 brw_upload_tes_prog(brw);
524 } else {
525 brw->tcs.base.prog_data = NULL;
526 brw->tes.base.prog_data = NULL;
527 }
528 }
529
530 static inline void
531 brw_upload_programs(struct brw_context *brw,
532 enum brw_pipeline pipeline)
533 {
534 struct gl_context *ctx = &brw->ctx;
535 const struct gen_device_info *devinfo = &brw->screen->devinfo;
536
537 if (pipeline == BRW_RENDER_PIPELINE) {
538 brw_upload_vs_prog(brw);
539 brw_upload_tess_programs(brw);
540
541 if (brw->programs[MESA_SHADER_GEOMETRY]) {
542 brw_upload_gs_prog(brw);
543 } else {
544 brw->gs.base.prog_data = NULL;
545 if (devinfo->gen < 7)
546 brw_upload_ff_gs_prog(brw);
547 }
548
549 /* Update the VUE map for data exiting the GS stage of the pipeline.
550 * This comes from the last enabled shader stage.
551 */
552 GLbitfield64 old_slots = brw->vue_map_geom_out.slots_valid;
553 bool old_separate = brw->vue_map_geom_out.separate;
554 struct brw_vue_prog_data *vue_prog_data;
555 if (brw->programs[MESA_SHADER_GEOMETRY])
556 vue_prog_data = brw_vue_prog_data(brw->gs.base.prog_data);
557 else if (brw->programs[MESA_SHADER_TESS_EVAL])
558 vue_prog_data = brw_vue_prog_data(brw->tes.base.prog_data);
559 else
560 vue_prog_data = brw_vue_prog_data(brw->vs.base.prog_data);
561
562 brw->vue_map_geom_out = vue_prog_data->vue_map;
563
564 /* If the layout has changed, signal BRW_NEW_VUE_MAP_GEOM_OUT. */
565 if (old_slots != brw->vue_map_geom_out.slots_valid ||
566 old_separate != brw->vue_map_geom_out.separate)
567 brw->ctx.NewDriverState |= BRW_NEW_VUE_MAP_GEOM_OUT;
568
569 if ((old_slots ^ brw->vue_map_geom_out.slots_valid) &
570 VARYING_BIT_VIEWPORT) {
571 ctx->NewDriverState |= BRW_NEW_VIEWPORT_COUNT;
572 brw->clip.viewport_count =
573 (brw->vue_map_geom_out.slots_valid & VARYING_BIT_VIEWPORT) ?
574 ctx->Const.MaxViewports : 1;
575 }
576
577 brw_upload_wm_prog(brw);
578
579 if (devinfo->gen < 6) {
580 brw_upload_clip_prog(brw);
581 brw_upload_sf_prog(brw);
582 }
583
584 brw_disk_cache_write_render_programs(brw);
585 } else if (pipeline == BRW_COMPUTE_PIPELINE) {
586 brw_upload_cs_prog(brw);
587 brw_disk_cache_write_compute_program(brw);
588 }
589 }
590
591 static inline void
592 merge_ctx_state(struct brw_context *brw,
593 struct brw_state_flags *state)
594 {
595 state->mesa |= brw->NewGLState;
596 state->brw |= brw->ctx.NewDriverState;
597 }
598
599 static ALWAYS_INLINE void
600 check_and_emit_atom(struct brw_context *brw,
601 struct brw_state_flags *state,
602 const struct brw_tracked_state *atom)
603 {
604 if (check_state(state, &atom->dirty)) {
605 atom->emit(brw);
606 merge_ctx_state(brw, state);
607 }
608 }
609
610 static inline void
611 brw_upload_pipeline_state(struct brw_context *brw,
612 enum brw_pipeline pipeline)
613 {
614 const struct gen_device_info *devinfo = &brw->screen->devinfo;
615 struct gl_context *ctx = &brw->ctx;
616 int i;
617 static int dirty_count = 0;
618 struct brw_state_flags state = brw->state.pipelines[pipeline];
619 const unsigned fb_samples =
620 MAX2(_mesa_geometric_samples(ctx->DrawBuffer), 1);
621
622 brw_select_pipeline(brw, pipeline);
623
624 if (pipeline == BRW_RENDER_PIPELINE && brw->current_hash_scale != 1)
625 brw_emit_hashing_mode(brw, UINT_MAX, UINT_MAX, 1);
626
627 if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {
628 /* Always re-emit all state. */
629 brw->NewGLState = ~0;
630 ctx->NewDriverState = ~0ull;
631 }
632
633 if (pipeline == BRW_RENDER_PIPELINE) {
634 if (brw->programs[MESA_SHADER_FRAGMENT] !=
635 ctx->FragmentProgram._Current) {
636 brw->programs[MESA_SHADER_FRAGMENT] = ctx->FragmentProgram._Current;
637 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
638 }
639
640 if (brw->programs[MESA_SHADER_TESS_EVAL] !=
641 ctx->TessEvalProgram._Current) {
642 brw->programs[MESA_SHADER_TESS_EVAL] = ctx->TessEvalProgram._Current;
643 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
644 }
645
646 if (brw->programs[MESA_SHADER_TESS_CTRL] !=
647 ctx->TessCtrlProgram._Current) {
648 brw->programs[MESA_SHADER_TESS_CTRL] = ctx->TessCtrlProgram._Current;
649 brw->ctx.NewDriverState |= BRW_NEW_TESS_PROGRAMS;
650 }
651
652 if (brw->programs[MESA_SHADER_GEOMETRY] !=
653 ctx->GeometryProgram._Current) {
654 brw->programs[MESA_SHADER_GEOMETRY] = ctx->GeometryProgram._Current;
655 brw->ctx.NewDriverState |= BRW_NEW_GEOMETRY_PROGRAM;
656 }
657
658 if (brw->programs[MESA_SHADER_VERTEX] != ctx->VertexProgram._Current) {
659 brw->programs[MESA_SHADER_VERTEX] = ctx->VertexProgram._Current;
660 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
661 }
662 }
663
664 if (brw->programs[MESA_SHADER_COMPUTE] != ctx->ComputeProgram._Current) {
665 brw->programs[MESA_SHADER_COMPUTE] = ctx->ComputeProgram._Current;
666 brw->ctx.NewDriverState |= BRW_NEW_COMPUTE_PROGRAM;
667 }
668
669 if (brw->meta_in_progress != _mesa_meta_in_progress(ctx)) {
670 brw->meta_in_progress = _mesa_meta_in_progress(ctx);
671 brw->ctx.NewDriverState |= BRW_NEW_META_IN_PROGRESS;
672 }
673
674 if (brw->num_samples != fb_samples) {
675 brw->num_samples = fb_samples;
676 brw->ctx.NewDriverState |= BRW_NEW_NUM_SAMPLES;
677 }
678
679 /* Exit early if no state is flagged as dirty */
680 merge_ctx_state(brw, &state);
681 if ((state.mesa | state.brw) == 0)
682 return;
683
684 /* Emit Sandybridge workaround flushes on every primitive, for safety. */
685 if (devinfo->gen == 6)
686 brw_emit_post_sync_nonzero_flush(brw);
687
688 brw_upload_programs(brw, pipeline);
689 merge_ctx_state(brw, &state);
690
691 brw_upload_state_base_address(brw);
692
693 const struct brw_tracked_state *atoms =
694 brw_get_pipeline_atoms(brw, pipeline);
695 const int num_atoms = brw->num_atoms[pipeline];
696
697 if (unlikely(INTEL_DEBUG)) {
698 /* Debug version which enforces various sanity checks on the
699 * state flags which are generated and checked to help ensure
700 * state atoms are ordered correctly in the list.
701 */
702 struct brw_state_flags examined, prev;
703 memset(&examined, 0, sizeof(examined));
704 prev = state;
705
706 for (i = 0; i < num_atoms; i++) {
707 const struct brw_tracked_state *atom = &atoms[i];
708 struct brw_state_flags generated;
709
710 check_and_emit_atom(brw, &state, atom);
711
712 accumulate_state(&examined, &atom->dirty);
713
714 /* generated = (prev ^ state)
715 * if (examined & generated)
716 * fail;
717 */
718 xor_states(&generated, &prev, &state);
719 assert(!check_state(&examined, &generated));
720 prev = state;
721 }
722 }
723 else {
724 for (i = 0; i < num_atoms; i++) {
725 const struct brw_tracked_state *atom = &atoms[i];
726
727 check_and_emit_atom(brw, &state, atom);
728 }
729 }
730
731 if (unlikely(INTEL_DEBUG & DEBUG_STATE)) {
732 STATIC_ASSERT(ARRAY_SIZE(brw_bits) == BRW_NUM_STATE_BITS + 1);
733
734 brw_update_dirty_count(mesa_bits, state.mesa);
735 brw_update_dirty_count(brw_bits, state.brw);
736 if (dirty_count++ % 1000 == 0) {
737 brw_print_dirty_count(mesa_bits);
738 brw_print_dirty_count(brw_bits);
739 fprintf(stderr, "\n");
740 }
741 }
742 }
743
744 /***********************************************************************
745 * Emit all state:
746 */
747 void brw_upload_render_state(struct brw_context *brw)
748 {
749 brw_upload_pipeline_state(brw, BRW_RENDER_PIPELINE);
750 }
751
752 static inline void
753 brw_pipeline_state_finished(struct brw_context *brw,
754 enum brw_pipeline pipeline)
755 {
756 /* Save all dirty state into the other pipelines */
757 for (unsigned i = 0; i < BRW_NUM_PIPELINES; i++) {
758 if (i != pipeline) {
759 brw->state.pipelines[i].mesa |= brw->NewGLState;
760 brw->state.pipelines[i].brw |= brw->ctx.NewDriverState;
761 } else {
762 memset(&brw->state.pipelines[i], 0, sizeof(struct brw_state_flags));
763 }
764 }
765
766 brw->NewGLState = 0;
767 brw->ctx.NewDriverState = 0ull;
768 }
769
770 /**
771 * Clear dirty bits to account for the fact that the state emitted by
772 * brw_upload_render_state() has been committed to the hardware. This is a
773 * separate call from brw_upload_render_state() because it's possible that
774 * after the call to brw_upload_render_state(), we will discover that we've
775 * run out of aperture space, and need to rewind the batch buffer to the state
776 * it had before the brw_upload_render_state() call.
777 */
778 void
779 brw_render_state_finished(struct brw_context *brw)
780 {
781 brw_pipeline_state_finished(brw, BRW_RENDER_PIPELINE);
782 }
783
784 void
785 brw_upload_compute_state(struct brw_context *brw)
786 {
787 brw_upload_pipeline_state(brw, BRW_COMPUTE_PIPELINE);
788 }
789
790 void
791 brw_compute_state_finished(struct brw_context *brw)
792 {
793 brw_pipeline_state_finished(brw, BRW_COMPUTE_PIPELINE);
794 }