*/
#include <pthread.h>
-#include "main/imports.h"
+#include "main/glspirv.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "program/prog_to_nir.h"
#include "tnl/tnl.h"
#include "util/ralloc.h"
#include "compiler/glsl/ir.h"
+#include "compiler/glsl/program.h"
+#include "compiler/glsl/gl_nir.h"
#include "compiler/glsl/glsl_to_nir.h"
#include "brw_program.h"
#include "brw_defines.h"
#include "intel_batchbuffer.h"
+#include "brw_cs.h"
+#include "brw_gs.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+#include "brw_state.h"
+
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+
static bool
brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
{
}
}
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+ gl_shader_stage stage,
+ GLuint id, bool is_arb_asm);
+
nir_shader *
brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
gl_shader_stage stage,
bool is_scalar)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- bool progress;
nir_shader *nir;
- /* First, lower the GLSL IR or Mesa IR to NIR */
+ /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
if (shader_prog) {
- nir = glsl_to_nir(shader_prog, stage, options);
- nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
- nir_lower_returns(nir);
- nir_validate_shader(nir);
+ if (shader_prog->data->spirv) {
+ nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
+ } else {
+ nir = glsl_to_nir(ctx, shader_prog, stage, options);
+
+ /* Remap the locations to slots so those requiring two slots will
+ * occupy two locations. For instance, if we have in the IR code a
+ * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
+ * will use locations/slots 0 and 1, and attr1 will use location/slot 2
+ */
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
+ }
+ assert (nir);
+
+ nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
+ NULL);
+ nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, false);
} else {
nir = prog_to_nir(prog, options);
NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
}
- nir_validate_shader(nir);
+ nir_validate_shader(nir, "before brw_preprocess_nir");
+
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ if (!ctx->SoftFP64 && nir->info.uses_64bit &&
+ (options->lower_doubles_options & nir_lower_fp64_full_software)) {
+ ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
+ }
- (void)progress;
+ brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
- nir = brw_preprocess_nir(brw->screen->compiler, nir);
+ if (stage == MESA_SHADER_TESS_CTRL) {
+ /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gen8+. */
+ static const gl_state_index16 tokens[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN };
+ nir_lower_patch_vertices(nir, 0, devinfo->gen >= 8 ? tokens : NULL);
+ }
+
+ if (stage == MESA_SHADER_TESS_EVAL) {
+ /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
+ * a uniform if we don't.
+ */
+ struct gl_linked_shader *tcs =
+ shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+ uint32_t static_patch_vertices =
+ tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
+ static const gl_state_index16 tokens[STATE_LENGTH] =
+ { STATE_INTERNAL, STATE_TES_PATCH_VERTICES_IN };
+ nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
+ }
if (stage == MESA_SHADER_FRAGMENT) {
static const struct nir_lower_wpos_ytransform_options wpos_options = {
.fs_coord_pixel_center_integer = 1,
.fs_coord_origin_upper_left = 1,
};
- _mesa_add_state_reference(prog->Parameters,
- (gl_state_index *) wpos_options.state_tokens);
+ bool progress = false;
NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
+ if (progress) {
+ _mesa_add_state_reference(prog->Parameters,
+ wpos_options.state_tokens);
+ }
}
- NIR_PASS(progress, nir, nir_lower_system_values);
- NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
+ return nir;
+}
- nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+static void
+shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+ assert(glsl_type_is_vector_or_scalar(type));
- /* nir_shader may have been cloned so make sure shader_info is in sync */
- if (nir->info != &prog->info) {
- const char *name = prog->info.name;
- const char *label = prog->info.label;
- prog->info = *nir->info;
- prog->info.name = name;
- prog->info.label = label;
- }
+ uint32_t comp_size = glsl_type_is_boolean(type)
+ ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length,
+ *align = comp_size * (length == 3 ? 4 : length);
+}
- if (shader_prog) {
- NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
- NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
+void
+brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ const struct gen_device_info *devinfo)
+{
+ NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
+ NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
+ prog->info.textures_used = prog->nir->info.textures_used;
+ prog->info.textures_used_by_txf = prog->nir->info.textures_used_by_txf;
+
+ NIR_PASS_V(prog->nir, brw_nir_lower_image_load_store, devinfo, NULL);
+
+ if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
+ shader_prog->data->spirv) {
+ NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
+ nir_var_mem_shared, shared_type_info);
+ NIR_PASS_V(prog->nir, nir_lower_explicit_io,
+ nir_var_mem_shared, nir_address_format_32bit_offset);
}
- return nir;
+ NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
+ /* Do a round of constant folding to clean up address calculations */
+ NIR_PASS_V(prog->nir, nir_opt_constant_folding);
+}
+
+void
+brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
+{
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ /* Copy the info we just generated back into the gl_program */
+ const char *prog_name = prog->info.name;
+ const char *prog_label = prog->info.label;
+ prog->info = nir->info;
+ prog->info.name = prog_name;
+ prog->info.label = prog_label;
}
static unsigned
get_new_program_id(struct intel_screen *screen)
{
- static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
- pthread_mutex_lock(&m);
- unsigned id = screen->program_id++;
- pthread_mutex_unlock(&m);
- return id;
+ return p_atomic_inc_return(&screen->program_id);
}
-static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+ gl_shader_stage stage,
GLuint id, bool is_arb_asm)
{
struct brw_context *brw = brw_context(ctx);
+ struct brw_program *prog = rzalloc(NULL, struct brw_program);
- switch (target) {
- case GL_VERTEX_PROGRAM_ARB:
- case GL_TESS_CONTROL_PROGRAM_NV:
- case GL_TESS_EVALUATION_PROGRAM_NV:
- case GL_GEOMETRY_PROGRAM_NV:
- case GL_COMPUTE_PROGRAM_NV: {
- struct brw_program *prog = rzalloc(NULL, struct brw_program);
- if (prog) {
- prog->id = get_new_program_id(brw->screen);
-
- return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
- }
- else
- return NULL;
- }
+ if (prog) {
+ prog->id = get_new_program_id(brw->screen);
- case GL_FRAGMENT_PROGRAM_ARB: {
- struct brw_program *prog = rzalloc(NULL, struct brw_program);
-
- if (prog) {
- prog->id = get_new_program_id(brw->screen);
-
- return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
- }
- else
- return NULL;
+ return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
}
- default:
- unreachable("Unsupported target in brwNewProgram()");
- }
+ return NULL;
}
static void brwDeleteProgram( struct gl_context *ctx,
/* Beware! prog's refcount has reached zero, and it's about to be freed.
*
- * In brw_upload_pipeline_state(), we compare brw->foo_program to
+ * In brw_upload_pipeline_state(), we compare brw->programs[i] to
* ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
* pointer has changed.
*
- * We cannot leave brw->foo_program as a dangling pointer to the dead
+ * We cannot leave brw->programs[i] as a dangling pointer to the dead
* program. malloc() may allocate the same memory for a new gl_program,
* causing us to see matching pointers...but totally different programs.
*
- * We cannot set brw->foo_program to NULL, either. If we've deleted the
+ * We cannot set brw->programs[i] to NULL, either. If we've deleted the
* active program, Mesa may set ctx->FooProgram._Current to NULL. That
* would cause us to see matching pointers (NULL == NULL), and fail to
* detect that a program has changed since our last draw.
*/
static const struct gl_program deleted_program;
- if (brw->vertex_program == prog)
- brw->vertex_program = &deleted_program;
-
- if (brw->tess_ctrl_program == prog)
- brw->tess_ctrl_program = &deleted_program;
-
- if (brw->tess_eval_program == prog)
- brw->tess_eval_program = &deleted_program;
-
- if (brw->geometry_program == prog)
- brw->geometry_program = &deleted_program;
-
- if (brw->fragment_program == prog)
- brw->fragment_program = &deleted_program;
-
- if (brw->compute_program == prog)
- brw->compute_program = &deleted_program;
+ for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (brw->programs[i] == prog)
+ brw->programs[i] = (struct gl_program *) &deleted_program;
+ }
_mesa_delete_program( ctx, prog );
}
case GL_FRAGMENT_PROGRAM_ARB: {
struct brw_program *newFP = brw_program(prog);
const struct brw_program *curFP =
- brw_program_const(brw->fragment_program);
+ brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
if (newFP == curFP)
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
+ _mesa_program_fragment_position_to_sysval(&newFP->program);
newFP->id = get_new_program_id(brw->screen);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
+ brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
+ brw_shader_gather_info(prog->nir, prog);
+
brw_fs_precompile(ctx, prog);
break;
}
case GL_VERTEX_PROGRAM_ARB: {
struct brw_program *newVP = brw_program(prog);
const struct brw_program *curVP =
- brw_program_const(brw->vertex_program);
+ brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
if (newVP == curVP)
brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
compiler->scalar_stage[MESA_SHADER_VERTEX]);
+ brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
+ brw_shader_gather_info(prog->nir, prog);
+
brw_vs_precompile(ctx, prog);
break;
}
brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
{
struct brw_context *brw = brw_context(ctx);
- unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
- PIPE_CONTROL_NO_WRITE |
- PIPE_CONTROL_CS_STALL);
- assert(brw->gen >= 7 && brw->gen <= 9);
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
+ assert(devinfo->gen >= 7 && devinfo->gen <= 11);
if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
GL_ELEMENT_ARRAY_BARRIER_BIT |
if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
- if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
- bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+ if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
+ GL_PIXEL_BUFFER_BARRIER_BIT))
+ bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+ PIPE_CONTROL_RENDER_TARGET_FLUSH);
if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
- bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+ bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_RENDER_TARGET_FLUSH);
/* Typed surface messages are handled by the render cache on IVB, so we
* need to flush it too.
*/
- if (brw->gen == 7 && !brw->is_haswell)
+ if (devinfo->gen == 7 && !devinfo->is_haswell)
bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
brw_emit_pipe_control_flush(brw, bits);
}
static void
-brw_blend_barrier(struct gl_context *ctx)
+brw_framebuffer_fetch_barrier(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
- if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
- if (brw->gen >= 6) {
+ if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
+ if (devinfo->gen >= 6) {
brw_emit_pipe_control_flush(brw,
PIPE_CONTROL_RENDER_TARGET_FLUSH |
PIPE_CONTROL_CS_STALL);
void
brw_get_scratch_bo(struct brw_context *brw,
- drm_intel_bo **scratch_bo, int size)
+ struct brw_bo **scratch_bo, int size)
{
- drm_intel_bo *old_bo = *scratch_bo;
+ struct brw_bo *old_bo = *scratch_bo;
if (old_bo && old_bo->size < size) {
- drm_intel_bo_unreference(old_bo);
+ brw_bo_unreference(old_bo);
old_bo = NULL;
}
if (!old_bo) {
- *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+ *scratch_bo =
+ brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
}
}
void
brw_alloc_stage_scratch(struct brw_context *brw,
struct brw_stage_state *stage_state,
- unsigned per_thread_size,
- unsigned thread_count)
+ unsigned per_thread_size)
{
- if (stage_state->per_thread_scratch < per_thread_size) {
- stage_state->per_thread_scratch = per_thread_size;
+ if (stage_state->per_thread_scratch >= per_thread_size)
+ return;
+
+ stage_state->per_thread_scratch = per_thread_size;
- if (stage_state->scratch_bo)
- drm_intel_bo_unreference(stage_state->scratch_bo);
+ if (stage_state->scratch_bo)
+ brw_bo_unreference(stage_state->scratch_bo);
- stage_state->scratch_bo =
- drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
- per_thread_size * thread_count, 4096);
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ unsigned thread_count;
+ switch(stage_state->stage) {
+ case MESA_SHADER_VERTEX:
+ thread_count = devinfo->max_vs_threads;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ thread_count = devinfo->max_tcs_threads;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ thread_count = devinfo->max_tes_threads;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ thread_count = devinfo->max_gs_threads;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ thread_count = devinfo->max_wm_threads;
+ break;
+ case MESA_SHADER_COMPUTE: {
+ unsigned subslices = MAX2(brw->screen->subslice_total, 1);
+
+ /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
+ *
+ * "Scratch Space per slice is computed based on 4 sub-slices. SW must
+ * allocate scratch space enough so that each slice has 4 slices
+ * allowed."
+ *
+ * According to the other driver team, this applies to compute shaders
+ * as well. This is not currently documented at all.
+ *
+ * brw->screen->subslice_total is the TOTAL number of subslices
+ * and we wish to view that there are 4 subslices per slice
+ * instead of the actual number of subslices per slice.
+ *
+ * For, ICL, scratch space allocation is based on the number of threads
+ * in the base configuration.
+ */
+ if (devinfo->gen == 11)
+ subslices = 8;
+ else if (devinfo->gen >= 9 && devinfo->gen < 11)
+ subslices = 4 * brw->screen->devinfo.num_slices;
+
+ unsigned scratch_ids_per_subslice;
+ if (devinfo->gen >= 11) {
+ /* The MEDIA_VFE_STATE docs say:
+ *
+ * "Starting with this configuration, the Maximum Number of
+ * Threads must be set to (#EU * 8) for GPGPU dispatches.
+ *
+ * Although there are only 7 threads per EU in the configuration,
+ * the FFTID is calculated as if there are 8 threads per EU,
+ * which in turn requires a larger amount of Scratch Space to be
+ * allocated by the driver."
+ */
+ scratch_ids_per_subslice = 8 * 8;
+ } else if (devinfo->is_haswell) {
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating
+ * which subslice, EU within a subslice, and thread within an EU it
+ * is. There's a maximum of two slices and two subslices, so these
+ * can be stored with a single bit. Even though there are only 10 EUs
+ * per subslice, this is stored in 4 bits, so there's an effective
+ * maximum value of 16 EUs. Similarly, although there are only 7
+ * threads per EU, this is stored in a 3 bit number, giving an
+ * effective maximum value of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ scratch_ids_per_subslice = 16 * 8;
+ } else if (devinfo->is_cherryview) {
+ /* Cherryview devices have either 6 or 8 EUs per subslice, and each
+ * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
+ * as if it had 8 EUs.
+ */
+ scratch_ids_per_subslice = 8 * 7;
+ } else {
+ scratch_ids_per_subslice = devinfo->max_cs_threads;
+ }
+
+ thread_count = scratch_ids_per_subslice * subslices;
+ break;
}
+ default:
+ unreachable("Unsupported stage!");
+ }
+
+ stage_state->scratch_bo =
+ brw_bo_alloc(brw->bufmgr, "shader scratch space",
+ per_thread_size * thread_count, BRW_MEMZONE_SCRATCH);
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
functions->LinkShader = brw_link_shader;
functions->MemoryBarrier = brw_memory_barrier;
- functions->BlendBarrier = brw_blend_barrier;
+ functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
}
struct shader_times {
{
const int max_entries = 2048;
brw->shader_time.bo =
- drm_intel_bo_alloc(brw->bufmgr, "shader time",
- max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
+ brw_bo_alloc(brw->bufmgr, "shader time",
+ max_entries * BRW_SHADER_TIME_STRIDE * 3,
+ BRW_MEMZONE_OTHER);
brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
case ST_GS:
case ST_FS8:
case ST_FS16:
+ case ST_FS32:
case ST_CS:
written = brw->shader_time.cumulative[i].written;
reset = brw->shader_time.cumulative[i].reset;
case ST_GS:
case ST_FS8:
case ST_FS16:
+ case ST_FS32:
case ST_CS:
total_by_type[type] += scaled[i];
break;
case ST_FS16:
stage = "fs16";
break;
+ case ST_FS32:
+ stage = "fs32";
+ break;
case ST_CS:
stage = "cs";
break;
print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
+ print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
}
* delaying reading the reports, but it doesn't look like it's a big
* overhead compared to the cost of tracking the time in the first place.
*/
- drm_intel_bo_map(brw->shader_time.bo, true);
- void *bo_map = brw->shader_time.bo->virtual;
+ void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
for (int i = 0; i < brw->shader_time.num_entries; i++) {
uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
/* Zero the BO out to clear it out for our next collection.
*/
memset(bo_map, 0, brw->shader_time.bo->size);
- drm_intel_bo_unmap(brw->shader_time.bo);
+ brw_bo_unmap(brw->shader_time.bo);
}
void
void
brw_destroy_shader_time(struct brw_context *brw)
{
- drm_intel_bo_unreference(brw->shader_time.bo);
+ brw_bo_unreference(brw->shader_time.bo);
brw->shader_time.bo = NULL;
}
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
- ralloc_free(prog_data->image_param);
}
void
}
void
-brw_setup_tex_for_precompile(struct brw_context *brw,
+brw_setup_tex_for_precompile(const struct gen_device_info *devinfo,
struct brw_sampler_prog_key_data *tex,
- struct gl_program *prog)
+ const struct gl_program *prog)
{
- const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+ const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
unsigned sampler_count = util_last_bit(prog->SamplersUsed);
for (unsigned i = 0; i < sampler_count; i++) {
if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
}
- if (prog->info.num_ssbos) {
+ if (prog->info.num_ssbos || prog->info.num_abos) {
+ assert(prog->info.num_abos <= BRW_MAX_ABO);
assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_ssbos;
+ next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
} else {
stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
}
stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
}
- if (prog->nir->info->uses_texture_gather) {
+ if (prog->info.uses_texture_gather) {
if (devinfo->gen >= 8) {
stage_prog_data->binding_table.gather_texture_start =
stage_prog_data->binding_table.texture_start;
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
- if (prog->info.num_abos) {
- stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_abos;
- } else {
- stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
- }
-
if (prog->info.num_images) {
stage_prog_data->binding_table.image_start = next_binding_table_offset;
next_binding_table_offset += prog->info.num_images;
stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
next_binding_table_offset += num_textures;
- /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
+ /* Set the binding table size. Some callers may append new entries
+ * and increase this accordingly.
+ */
+ stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
return next_binding_table_offset;
}
+
+void
+brw_populate_default_key(const struct brw_compiler *compiler,
+ union brw_any_prog_key *prog_key,
+ struct gl_shader_program *sh_prog,
+ struct gl_program *prog)
+{
+ switch (prog->info.stage) {
+ case MESA_SHADER_VERTEX:
+ brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
+ break;
+ case MESA_SHADER_GEOMETRY:
+ brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
+ break;
+ case MESA_SHADER_FRAGMENT:
+ brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
+ break;
+ case MESA_SHADER_COMPUTE:
+ brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
+ break;
+ default:
+ unreachable("Unsupported stage!");
+ }
+}
+
+void
+brw_debug_recompile(struct brw_context *brw,
+ gl_shader_stage stage,
+ unsigned api_id,
+ struct brw_base_prog_key *key)
+{
+ const struct brw_compiler *compiler = brw->screen->compiler;
+ enum brw_cache_id cache_id = brw_stage_cache_id(stage);
+
+ compiler->shader_perf_log(brw, "Recompiling %s shader for program %d\n",
+ _mesa_shader_stage_to_string(stage), api_id);
+
+ const void *old_key =
+ brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
+
+ brw_debug_key_recompile(compiler, brw, stage, old_key, key);
+}