i965: Use atomic ops in get_new_program_id().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32 #include <pthread.h>
33 #include "main/imports.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_to_nir.h"
37 #include "program/program.h"
38 #include "program/programopt.h"
39 #include "tnl/tnl.h"
40 #include "util/ralloc.h"
41 #include "compiler/glsl/ir.h"
42 #include "compiler/glsl/glsl_to_nir.h"
43
44 #include "brw_program.h"
45 #include "brw_context.h"
46 #include "compiler/brw_nir.h"
47 #include "brw_defines.h"
48 #include "intel_batchbuffer.h"
49
50 static bool
51 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
52 {
53 if (is_scalar) {
54 nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
55 type_size_scalar_bytes);
56 return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
57 } else {
58 nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
59 type_size_vec4_bytes);
60 return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
61 }
62 }
63
64 nir_shader *
65 brw_create_nir(struct brw_context *brw,
66 const struct gl_shader_program *shader_prog,
67 struct gl_program *prog,
68 gl_shader_stage stage,
69 bool is_scalar)
70 {
71 struct gl_context *ctx = &brw->ctx;
72 const nir_shader_compiler_options *options =
73 ctx->Const.ShaderCompilerOptions[stage].NirOptions;
74 bool progress;
75 nir_shader *nir;
76
77 /* First, lower the GLSL IR or Mesa IR to NIR */
78 if (shader_prog) {
79 nir = glsl_to_nir(shader_prog, stage, options);
80 nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
81 nir_lower_returns(nir);
82 nir_validate_shader(nir);
83 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
84 nir_shader_get_entrypoint(nir), true, false);
85 } else {
86 nir = prog_to_nir(prog, options);
87 NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
88 }
89 nir_validate_shader(nir);
90
91 (void)progress;
92
93 nir = brw_preprocess_nir(brw->screen->compiler, nir);
94
95 if (stage == MESA_SHADER_FRAGMENT) {
96 static const struct nir_lower_wpos_ytransform_options wpos_options = {
97 .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
98 .fs_coord_pixel_center_integer = 1,
99 .fs_coord_origin_upper_left = 1,
100 };
101 _mesa_add_state_reference(prog->Parameters,
102 (gl_state_index *) wpos_options.state_tokens);
103
104 NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
105 }
106
107 NIR_PASS(progress, nir, nir_lower_system_values);
108 NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
109
110 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
111
112 /* Copy the info we just generated back into the gl_program */
113 const char *prog_name = prog->info.name;
114 const char *prog_label = prog->info.label;
115 prog->info = nir->info;
116 prog->info.name = prog_name;
117 prog->info.label = prog_label;
118
119 if (shader_prog) {
120 NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
121 NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
122 }
123
124 return nir;
125 }
126
127 static unsigned
128 get_new_program_id(struct intel_screen *screen)
129 {
130 return p_atomic_inc_return(&screen->program_id);
131 }
132
133 static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
134 GLuint id, bool is_arb_asm)
135 {
136 struct brw_context *brw = brw_context(ctx);
137 struct brw_program *prog = rzalloc(NULL, struct brw_program);
138
139 if (prog) {
140 prog->id = get_new_program_id(brw->screen);
141
142 return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
143 }
144
145 return NULL;
146 }
147
148 static void brwDeleteProgram( struct gl_context *ctx,
149 struct gl_program *prog )
150 {
151 struct brw_context *brw = brw_context(ctx);
152
153 /* Beware! prog's refcount has reached zero, and it's about to be freed.
154 *
155 * In brw_upload_pipeline_state(), we compare brw->foo_program to
156 * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
157 * pointer has changed.
158 *
159 * We cannot leave brw->foo_program as a dangling pointer to the dead
160 * program. malloc() may allocate the same memory for a new gl_program,
161 * causing us to see matching pointers...but totally different programs.
162 *
163 * We cannot set brw->foo_program to NULL, either. If we've deleted the
164 * active program, Mesa may set ctx->FooProgram._Current to NULL. That
165 * would cause us to see matching pointers (NULL == NULL), and fail to
166 * detect that a program has changed since our last draw.
167 *
168 * So, set it to a bogus gl_program pointer that will never match,
169 * causing us to properly reevaluate the state on our next draw.
170 *
171 * Getting this wrong causes heisenbugs which are very hard to catch,
172 * as you need a very specific allocation pattern to hit the problem.
173 */
174 static const struct gl_program deleted_program;
175
176 if (brw->vertex_program == prog)
177 brw->vertex_program = &deleted_program;
178
179 if (brw->tess_ctrl_program == prog)
180 brw->tess_ctrl_program = &deleted_program;
181
182 if (brw->tess_eval_program == prog)
183 brw->tess_eval_program = &deleted_program;
184
185 if (brw->geometry_program == prog)
186 brw->geometry_program = &deleted_program;
187
188 if (brw->fragment_program == prog)
189 brw->fragment_program = &deleted_program;
190
191 if (brw->compute_program == prog)
192 brw->compute_program = &deleted_program;
193
194 _mesa_delete_program( ctx, prog );
195 }
196
197
198 static GLboolean
199 brwProgramStringNotify(struct gl_context *ctx,
200 GLenum target,
201 struct gl_program *prog)
202 {
203 assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
204
205 struct brw_context *brw = brw_context(ctx);
206 const struct brw_compiler *compiler = brw->screen->compiler;
207
208 switch (target) {
209 case GL_FRAGMENT_PROGRAM_ARB: {
210 struct brw_program *newFP = brw_program(prog);
211 const struct brw_program *curFP =
212 brw_program_const(brw->fragment_program);
213
214 if (newFP == curFP)
215 brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
216 newFP->id = get_new_program_id(brw->screen);
217
218 prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
219
220 brw_fs_precompile(ctx, prog);
221 break;
222 }
223 case GL_VERTEX_PROGRAM_ARB: {
224 struct brw_program *newVP = brw_program(prog);
225 const struct brw_program *curVP =
226 brw_program_const(brw->vertex_program);
227
228 if (newVP == curVP)
229 brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
230 if (newVP->program.arb.IsPositionInvariant) {
231 _mesa_insert_mvp_code(ctx, &newVP->program);
232 }
233 newVP->id = get_new_program_id(brw->screen);
234
235 /* Also tell tnl about it:
236 */
237 _tnl_program_string(ctx, target, prog);
238
239 prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
240 compiler->scalar_stage[MESA_SHADER_VERTEX]);
241
242 brw_vs_precompile(ctx, prog);
243 break;
244 }
245 default:
246 /*
247 * driver->ProgramStringNotify is only called for ARB programs, fixed
248 * function vertex programs, and ir_to_mesa (which isn't used by the
249 * i965 back-end). Therefore, even after geometry shaders are added,
250 * this function should only ever be called with a target of
251 * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
252 */
253 unreachable("Unexpected target in brwProgramStringNotify");
254 }
255
256 return true;
257 }
258
259 static void
260 brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
261 {
262 struct brw_context *brw = brw_context(ctx);
263 const struct gen_device_info *devinfo = &brw->screen->devinfo;
264 unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
265 PIPE_CONTROL_NO_WRITE |
266 PIPE_CONTROL_CS_STALL);
267 assert(devinfo->gen >= 7 && devinfo->gen <= 10);
268
269 if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
270 GL_ELEMENT_ARRAY_BARRIER_BIT |
271 GL_COMMAND_BARRIER_BIT))
272 bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
273
274 if (barriers & GL_UNIFORM_BARRIER_BIT)
275 bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
276 PIPE_CONTROL_CONST_CACHE_INVALIDATE);
277
278 if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
279 bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
280
281 if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
282 bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
283
284 if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
285 bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
286 PIPE_CONTROL_RENDER_TARGET_FLUSH);
287
288 /* Typed surface messages are handled by the render cache on IVB, so we
289 * need to flush it too.
290 */
291 if (devinfo->gen == 7 && !devinfo->is_haswell)
292 bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
293
294 brw_emit_pipe_control_flush(brw, bits);
295 }
296
297 static void
298 brw_blend_barrier(struct gl_context *ctx)
299 {
300 struct brw_context *brw = brw_context(ctx);
301 const struct gen_device_info *devinfo = &brw->screen->devinfo;
302
303 if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
304 if (devinfo->gen >= 6) {
305 brw_emit_pipe_control_flush(brw,
306 PIPE_CONTROL_RENDER_TARGET_FLUSH |
307 PIPE_CONTROL_CS_STALL);
308 brw_emit_pipe_control_flush(brw,
309 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
310 } else {
311 brw_emit_pipe_control_flush(brw,
312 PIPE_CONTROL_RENDER_TARGET_FLUSH);
313 }
314 }
315 }
316
317 void
318 brw_get_scratch_bo(struct brw_context *brw,
319 struct brw_bo **scratch_bo, int size)
320 {
321 struct brw_bo *old_bo = *scratch_bo;
322
323 if (old_bo && old_bo->size < size) {
324 brw_bo_unreference(old_bo);
325 old_bo = NULL;
326 }
327
328 if (!old_bo) {
329 *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
330 }
331 }
332
333 /**
334 * Reserve enough scratch space for the given stage to hold \p per_thread_size
335 * bytes times the given \p thread_count.
336 */
337 void
338 brw_alloc_stage_scratch(struct brw_context *brw,
339 struct brw_stage_state *stage_state,
340 unsigned per_thread_size,
341 unsigned thread_count)
342 {
343 if (stage_state->per_thread_scratch < per_thread_size) {
344 stage_state->per_thread_scratch = per_thread_size;
345
346 if (stage_state->scratch_bo)
347 brw_bo_unreference(stage_state->scratch_bo);
348
349 stage_state->scratch_bo =
350 brw_bo_alloc(brw->bufmgr, "shader scratch space",
351 per_thread_size * thread_count, 4096);
352 }
353 }
354
355 void brwInitFragProgFuncs( struct dd_function_table *functions )
356 {
357 assert(functions->ProgramStringNotify == _tnl_program_string);
358
359 functions->NewProgram = brwNewProgram;
360 functions->DeleteProgram = brwDeleteProgram;
361 functions->ProgramStringNotify = brwProgramStringNotify;
362
363 functions->LinkShader = brw_link_shader;
364
365 functions->MemoryBarrier = brw_memory_barrier;
366 functions->BlendBarrier = brw_blend_barrier;
367 }
368
369 struct shader_times {
370 uint64_t time;
371 uint64_t written;
372 uint64_t reset;
373 };
374
375 void
376 brw_init_shader_time(struct brw_context *brw)
377 {
378 const int max_entries = 2048;
379 brw->shader_time.bo =
380 brw_bo_alloc(brw->bufmgr, "shader time",
381 max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
382 brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
383 brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
384 brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
385 max_entries);
386 brw->shader_time.cumulative = rzalloc_array(brw, struct shader_times,
387 max_entries);
388 brw->shader_time.max_entries = max_entries;
389 }
390
391 static int
392 compare_time(const void *a, const void *b)
393 {
394 uint64_t * const *a_val = a;
395 uint64_t * const *b_val = b;
396
397 /* We don't just subtract because we're turning the value to an int. */
398 if (**a_val < **b_val)
399 return -1;
400 else if (**a_val == **b_val)
401 return 0;
402 else
403 return 1;
404 }
405
406 static void
407 print_shader_time_line(const char *stage, const char *name,
408 int shader_num, uint64_t time, uint64_t total)
409 {
410 fprintf(stderr, "%-6s%-18s", stage, name);
411
412 if (shader_num != 0)
413 fprintf(stderr, "%4d: ", shader_num);
414 else
415 fprintf(stderr, " : ");
416
417 fprintf(stderr, "%16lld (%7.2f Gcycles) %4.1f%%\n",
418 (long long)time,
419 (double)time / 1000000000.0,
420 (double)time / total * 100.0);
421 }
422
423 static void
424 brw_report_shader_time(struct brw_context *brw)
425 {
426 if (!brw->shader_time.bo || !brw->shader_time.num_entries)
427 return;
428
429 uint64_t scaled[brw->shader_time.num_entries];
430 uint64_t *sorted[brw->shader_time.num_entries];
431 uint64_t total_by_type[ST_CS + 1];
432 memset(total_by_type, 0, sizeof(total_by_type));
433 double total = 0;
434 for (int i = 0; i < brw->shader_time.num_entries; i++) {
435 uint64_t written = 0, reset = 0;
436 enum shader_time_shader_type type = brw->shader_time.types[i];
437
438 sorted[i] = &scaled[i];
439
440 switch (type) {
441 case ST_VS:
442 case ST_TCS:
443 case ST_TES:
444 case ST_GS:
445 case ST_FS8:
446 case ST_FS16:
447 case ST_CS:
448 written = brw->shader_time.cumulative[i].written;
449 reset = brw->shader_time.cumulative[i].reset;
450 break;
451
452 default:
453 /* I sometimes want to print things that aren't the 3 shader times.
454 * Just print the sum in that case.
455 */
456 written = 1;
457 reset = 0;
458 break;
459 }
460
461 uint64_t time = brw->shader_time.cumulative[i].time;
462 if (written) {
463 scaled[i] = time / written * (written + reset);
464 } else {
465 scaled[i] = time;
466 }
467
468 switch (type) {
469 case ST_VS:
470 case ST_TCS:
471 case ST_TES:
472 case ST_GS:
473 case ST_FS8:
474 case ST_FS16:
475 case ST_CS:
476 total_by_type[type] += scaled[i];
477 break;
478 default:
479 break;
480 }
481
482 total += scaled[i];
483 }
484
485 if (total == 0) {
486 fprintf(stderr, "No shader time collected yet\n");
487 return;
488 }
489
490 qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
491
492 fprintf(stderr, "\n");
493 fprintf(stderr, "type ID cycles spent %% of total\n");
494 for (int s = 0; s < brw->shader_time.num_entries; s++) {
495 const char *stage;
496 /* Work back from the sorted pointers times to a time to print. */
497 int i = sorted[s] - scaled;
498
499 if (scaled[i] == 0)
500 continue;
501
502 int shader_num = brw->shader_time.ids[i];
503 const char *shader_name = brw->shader_time.names[i];
504
505 switch (brw->shader_time.types[i]) {
506 case ST_VS:
507 stage = "vs";
508 break;
509 case ST_TCS:
510 stage = "tcs";
511 break;
512 case ST_TES:
513 stage = "tes";
514 break;
515 case ST_GS:
516 stage = "gs";
517 break;
518 case ST_FS8:
519 stage = "fs8";
520 break;
521 case ST_FS16:
522 stage = "fs16";
523 break;
524 case ST_CS:
525 stage = "cs";
526 break;
527 default:
528 stage = "other";
529 break;
530 }
531
532 print_shader_time_line(stage, shader_name, shader_num,
533 scaled[i], total);
534 }
535
536 fprintf(stderr, "\n");
537 print_shader_time_line("total", "vs", 0, total_by_type[ST_VS], total);
538 print_shader_time_line("total", "tcs", 0, total_by_type[ST_TCS], total);
539 print_shader_time_line("total", "tes", 0, total_by_type[ST_TES], total);
540 print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
541 print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
542 print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
543 print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
544 }
545
546 static void
547 brw_collect_shader_time(struct brw_context *brw)
548 {
549 if (!brw->shader_time.bo)
550 return;
551
552 /* This probably stalls on the last rendering. We could fix that by
553 * delaying reading the reports, but it doesn't look like it's a big
554 * overhead compared to the cost of tracking the time in the first place.
555 */
556 void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
557
558 for (int i = 0; i < brw->shader_time.num_entries; i++) {
559 uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
560
561 brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
562 brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
563 brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
564 }
565
566 /* Zero the BO out to clear it out for our next collection.
567 */
568 memset(bo_map, 0, brw->shader_time.bo->size);
569 brw_bo_unmap(brw->shader_time.bo);
570 }
571
572 void
573 brw_collect_and_report_shader_time(struct brw_context *brw)
574 {
575 brw_collect_shader_time(brw);
576
577 if (brw->shader_time.report_time == 0 ||
578 get_time() - brw->shader_time.report_time >= 1.0) {
579 brw_report_shader_time(brw);
580 brw->shader_time.report_time = get_time();
581 }
582 }
583
584 /**
585 * Chooses an index in the shader_time buffer and sets up tracking information
586 * for our printouts.
587 *
588 * Note that this holds on to references to the underlying programs, which may
589 * change their lifetimes compared to normal operation.
590 */
591 int
592 brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
593 enum shader_time_shader_type type, bool is_glsl_sh)
594 {
595 int shader_time_index = brw->shader_time.num_entries++;
596 assert(shader_time_index < brw->shader_time.max_entries);
597 brw->shader_time.types[shader_time_index] = type;
598
599 const char *name;
600 if (prog->Id == 0) {
601 name = "ff";
602 } else if (is_glsl_sh) {
603 name = prog->info.label ?
604 ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
605 } else {
606 name = "prog";
607 }
608
609 brw->shader_time.names[shader_time_index] = name;
610 brw->shader_time.ids[shader_time_index] = prog->Id;
611
612 return shader_time_index;
613 }
614
615 void
616 brw_destroy_shader_time(struct brw_context *brw)
617 {
618 brw_bo_unreference(brw->shader_time.bo);
619 brw->shader_time.bo = NULL;
620 }
621
622 void
623 brw_stage_prog_data_free(const void *p)
624 {
625 struct brw_stage_prog_data *prog_data = (struct brw_stage_prog_data *)p;
626
627 ralloc_free(prog_data->param);
628 ralloc_free(prog_data->pull_param);
629 ralloc_free(prog_data->image_param);
630 }
631
632 void
633 brw_dump_arb_asm(const char *stage, struct gl_program *prog)
634 {
635 fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
636 stage, prog->Id, stage);
637 _mesa_print_program(prog);
638 }
639
640 void
641 brw_setup_tex_for_precompile(struct brw_context *brw,
642 struct brw_sampler_prog_key_data *tex,
643 struct gl_program *prog)
644 {
645 const struct gen_device_info *devinfo = &brw->screen->devinfo;
646 const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
647 unsigned sampler_count = util_last_bit(prog->SamplersUsed);
648 for (unsigned i = 0; i < sampler_count; i++) {
649 if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
650 /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
651 tex->swizzles[i] =
652 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
653 } else {
654 /* Color sampler: assume no swizzling. */
655 tex->swizzles[i] = SWIZZLE_XYZW;
656 }
657 }
658 }
659
660 /**
661 * Sets up the starting offsets for the groups of binding table entries
662 * common to all pipeline stages.
663 *
664 * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
665 * unused but also make sure that addition of small offsets to them will
666 * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
667 */
668 uint32_t
669 brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
670 const struct gl_program *prog,
671 struct brw_stage_prog_data *stage_prog_data,
672 uint32_t next_binding_table_offset)
673 {
674 int num_textures = util_last_bit(prog->SamplersUsed);
675
676 stage_prog_data->binding_table.texture_start = next_binding_table_offset;
677 next_binding_table_offset += num_textures;
678
679 if (prog->info.num_ubos) {
680 assert(prog->info.num_ubos <= BRW_MAX_UBO);
681 stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
682 next_binding_table_offset += prog->info.num_ubos;
683 } else {
684 stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
685 }
686
687 if (prog->info.num_ssbos) {
688 assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
689 stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
690 next_binding_table_offset += prog->info.num_ssbos;
691 } else {
692 stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
693 }
694
695 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
696 stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
697 next_binding_table_offset++;
698 } else {
699 stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
700 }
701
702 if (prog->nir->info.uses_texture_gather) {
703 if (devinfo->gen >= 8) {
704 stage_prog_data->binding_table.gather_texture_start =
705 stage_prog_data->binding_table.texture_start;
706 } else {
707 stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
708 next_binding_table_offset += num_textures;
709 }
710 } else {
711 stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
712 }
713
714 if (prog->info.num_abos) {
715 stage_prog_data->binding_table.abo_start = next_binding_table_offset;
716 next_binding_table_offset += prog->info.num_abos;
717 } else {
718 stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
719 }
720
721 if (prog->info.num_images) {
722 stage_prog_data->binding_table.image_start = next_binding_table_offset;
723 next_binding_table_offset += prog->info.num_images;
724 } else {
725 stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
726 }
727
728 /* This may or may not be used depending on how the compile goes. */
729 stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
730 next_binding_table_offset++;
731
732 /* Plane 0 is just the regular texture section */
733 stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
734
735 stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
736 next_binding_table_offset += num_textures;
737
738 stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
739 next_binding_table_offset += num_textures;
740
741 /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
742
743 assert(next_binding_table_offset <= BRW_MAX_SURFACES);
744 return next_binding_table_offset;
745 }