*/
#include <pthread.h>
-#include "main/imports.h"
#include "main/glspirv.h"
#include "program/prog_parameter.h"
#include "program/prog_print.h"
}
}
-static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+ gl_shader_stage stage,
GLuint id, bool is_arb_asm);
nir_shader *
nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
} else {
nir = glsl_to_nir(ctx, shader_prog, stage, options);
+
+ /* Remap the locations to slots so those requiring two slots will
+ * occupy two locations. For instance, if we have in the IR code a
+ * dvec3 attr0 in location 0 and vec4 attr1 in location 1, in NIR attr0
+ * will use locations/slots 0 and 1, and attr1 will use location/slot 2
+ */
+ if (nir->info.stage == MESA_SHADER_VERTEX)
+ nir_remap_dual_slot_attributes(nir, &prog->DualSlotInputs);
}
assert (nir);
- nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
+ nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out,
+ NULL);
nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir");
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
nir_shader_get_entrypoint(nir), true, false);
} else {
nir = prog_to_nir(prog, options);
NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
- NIR_PASS_V(nir, gl_nir_lower_samplers, NULL);
}
nir_validate_shader(nir, "before brw_preprocess_nir");
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
- nir_shader *softfp64 = NULL;
- if ((options->lower_doubles_options & nir_lower_fp64_full_software) &&
- nir->info.uses_64bit) {
- softfp64 = glsl_float64_funcs_to_nir(ctx, options);
- ralloc_steal(ralloc_parent(nir), softfp64);
+ if (!ctx->SoftFP64 && nir->info.uses_64bit &&
+ (options->lower_doubles_options & nir_lower_fp64_full_software)) {
+ ctx->SoftFP64 = glsl_float64_funcs_to_nir(ctx, options);
}
- nir = brw_preprocess_nir(brw->screen->compiler, nir, softfp64);
-
- NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
-
- NIR_PASS_V(nir, gl_nir_lower_buffers, shader_prog);
- /* Do a round of constant folding to clean up address calculations */
- NIR_PASS_V(nir, nir_opt_constant_folding);
+ brw_preprocess_nir(brw->screen->compiler, nir, ctx->SoftFP64);
if (stage == MESA_SHADER_TESS_CTRL) {
/* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gen8+. */
}
}
- NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
-
return nir;
}
+static void
+shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
+{
+ assert(glsl_type_is_vector_or_scalar(type));
+
+ uint32_t comp_size = glsl_type_is_boolean(type)
+ ? 4 : glsl_get_bit_size(type) / 8;
+ unsigned length = glsl_get_vector_elements(type);
+ *size = comp_size * length,
+ *align = comp_size * (length == 3 ? 4 : length);
+}
+
+void
+brw_nir_lower_resources(nir_shader *nir, struct gl_shader_program *shader_prog,
+ struct gl_program *prog,
+ const struct gen_device_info *devinfo)
+{
+ NIR_PASS_V(nir, brw_nir_lower_uniforms, nir->options->lower_to_scalar);
+ NIR_PASS_V(prog->nir, gl_nir_lower_samplers, shader_prog);
+ prog->info.textures_used = prog->nir->info.textures_used;
+ prog->info.textures_used_by_txf = prog->nir->info.textures_used_by_txf;
+
+ NIR_PASS_V(prog->nir, brw_nir_lower_image_load_store, devinfo, NULL);
+
+ if (prog->nir->info.stage == MESA_SHADER_COMPUTE &&
+ shader_prog->data->spirv) {
+ NIR_PASS_V(prog->nir, nir_lower_vars_to_explicit_types,
+ nir_var_mem_shared, shared_type_info);
+ NIR_PASS_V(prog->nir, nir_lower_explicit_io,
+ nir_var_mem_shared, nir_address_format_32bit_offset);
+ }
+
+ NIR_PASS_V(prog->nir, gl_nir_lower_buffers, shader_prog);
+ /* Do a round of constant folding to clean up address calculations */
+ NIR_PASS_V(prog->nir, nir_opt_constant_folding);
+}
+
void
brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
{
return p_atomic_inc_return(&screen->program_id);
}
-static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+static struct gl_program *brwNewProgram(struct gl_context *ctx,
+ gl_shader_stage stage,
GLuint id, bool is_arb_asm)
{
struct brw_context *brw = brw_context(ctx);
if (prog) {
prog->id = get_new_program_id(brw->screen);
- return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
+ return _mesa_init_gl_program(&prog->program, stage, id, is_arb_asm);
}
return NULL;
if (newFP == curFP)
brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
+ _mesa_program_fragment_position_to_sysval(&newFP->program);
newFP->id = get_new_program_id(brw->screen);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
+ brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
brw_shader_gather_info(prog->nir, prog);
brw_fs_precompile(ctx, prog);
prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
compiler->scalar_stage[MESA_SHADER_VERTEX]);
+ brw_nir_lower_resources(prog->nir, NULL, prog, &brw->screen->devinfo);
+
brw_shader_gather_info(prog->nir, prog);
brw_vs_precompile(ctx, prog);
* brw->screen->subslice_total is the TOTAL number of subslices
* and we wish to view that there are 4 subslices per slice
* instead of the actual number of subslices per slice.
+ *
+ * For, ICL, scratch space allocation is based on the number of threads
+ * in the base configuration.
*/
- if (devinfo->gen >= 9 && devinfo->gen < 11)
+ if (devinfo->gen == 11)
+ subslices = 8;
+ else if (devinfo->gen >= 9 && devinfo->gen < 11)
subslices = 4 * brw->screen->devinfo.num_slices;
unsigned scratch_ids_per_subslice;
- if (devinfo->is_haswell) {
+ if (devinfo->gen >= 11) {
+ /* The MEDIA_VFE_STATE docs say:
+ *
+ * "Starting with this configuration, the Maximum Number of
+ * Threads must be set to (#EU * 8) for GPGPU dispatches.
+ *
+ * Although there are only 7 threads per EU in the configuration,
+ * the FFTID is calculated as if there are 8 threads per EU,
+ * which in turn requires a larger amount of Scratch Space to be
+ * allocated by the driver."
+ */
+ scratch_ids_per_subslice = 8 * 8;
+ } else if (devinfo->is_haswell) {
/* WaCSScratchSize:hsw
*
* Haswell's scratch space address calculation appears to be sparse
void
brw_setup_tex_for_precompile(const struct gen_device_info *devinfo,
struct brw_sampler_prog_key_data *tex,
- struct gl_program *prog)
+ const struct gl_program *prog)
{
const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
unsigned sampler_count = util_last_bit(prog->SamplersUsed);
}
void
-brw_prog_key_set_id(union brw_any_prog_key *key, gl_shader_stage stage,
- unsigned id)
-{
- static const unsigned stage_offsets[] = {
- offsetof(struct brw_vs_prog_key, program_string_id),
- offsetof(struct brw_tcs_prog_key, program_string_id),
- offsetof(struct brw_tes_prog_key, program_string_id),
- offsetof(struct brw_gs_prog_key, program_string_id),
- offsetof(struct brw_wm_prog_key, program_string_id),
- offsetof(struct brw_cs_prog_key, program_string_id),
- };
- assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_offsets));
- *(unsigned*)((uint8_t*)key + stage_offsets[stage]) = id;
-}
-
-void
-brw_populate_default_key(const struct gen_device_info *devinfo,
+brw_populate_default_key(const struct brw_compiler *compiler,
union brw_any_prog_key *prog_key,
struct gl_shader_program *sh_prog,
struct gl_program *prog)
{
switch (prog->info.stage) {
case MESA_SHADER_VERTEX:
- brw_vs_populate_default_key(devinfo, &prog_key->vs, prog);
+ brw_vs_populate_default_key(compiler, &prog_key->vs, prog);
break;
case MESA_SHADER_TESS_CTRL:
- brw_tcs_populate_default_key(devinfo, &prog_key->tcs, sh_prog, prog);
+ brw_tcs_populate_default_key(compiler, &prog_key->tcs, sh_prog, prog);
break;
case MESA_SHADER_TESS_EVAL:
- brw_tes_populate_default_key(devinfo, &prog_key->tes, sh_prog, prog);
+ brw_tes_populate_default_key(compiler, &prog_key->tes, sh_prog, prog);
break;
case MESA_SHADER_GEOMETRY:
- brw_gs_populate_default_key(devinfo, &prog_key->gs, prog);
+ brw_gs_populate_default_key(compiler, &prog_key->gs, prog);
break;
case MESA_SHADER_FRAGMENT:
- brw_wm_populate_default_key(devinfo, &prog_key->wm, prog);
+ brw_wm_populate_default_key(compiler, &prog_key->wm, prog);
break;
case MESA_SHADER_COMPUTE:
- brw_cs_populate_default_key(devinfo, &prog_key->cs, prog);
+ brw_cs_populate_default_key(compiler, &prog_key->cs, prog);
break;
default:
unreachable("Unsupported stage!");
brw_debug_recompile(struct brw_context *brw,
gl_shader_stage stage,
unsigned api_id,
- unsigned key_program_string_id,
- void *key)
+ struct brw_base_prog_key *key)
{
const struct brw_compiler *compiler = brw->screen->compiler;
enum brw_cache_id cache_id = brw_stage_cache_id(stage);
_mesa_shader_stage_to_string(stage), api_id);
const void *old_key =
- brw_find_previous_compile(&brw->cache, cache_id, key_program_string_id);
+ brw_find_previous_compile(&brw->cache, cache_id, key->program_string_id);
brw_debug_key_recompile(compiler, brw, stage, old_key, key);
}