#include "util/ralloc.h"
#include "compiler/glsl/ir.h"
#include "compiler/glsl/glsl_to_nir.h"
+#include "compiler/nir/nir_serialize.h"
#include "brw_program.h"
#include "brw_context.h"
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- bool progress;
nir_shader *nir;
/* First, lower the GLSL IR or Mesa IR to NIR */
}
nir_validate_shader(nir);
- (void)progress;
-
nir = brw_preprocess_nir(brw->screen->compiler, nir);
if (stage == MESA_SHADER_FRAGMENT) {
.fs_coord_pixel_center_integer = 1,
.fs_coord_origin_upper_left = 1,
};
- _mesa_add_state_reference(prog->Parameters,
- (gl_state_index *) wpos_options.state_tokens);
+ bool progress = false;
NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
+ if (progress) {
+ _mesa_add_state_reference(prog->Parameters,
+ (gl_state_index *) wpos_options.state_tokens);
+ }
}
- NIR_PASS(progress, nir, nir_lower_system_values);
NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
return nir;
void
brw_alloc_stage_scratch(struct brw_context *brw,
struct brw_stage_state *stage_state,
- unsigned per_thread_size,
- unsigned thread_count)
+ unsigned per_thread_size)
{
- if (stage_state->per_thread_scratch < per_thread_size) {
- stage_state->per_thread_scratch = per_thread_size;
+ if (stage_state->per_thread_scratch >= per_thread_size)
+ return;
+
+ stage_state->per_thread_scratch = per_thread_size;
- if (stage_state->scratch_bo)
- brw_bo_unreference(stage_state->scratch_bo);
+ if (stage_state->scratch_bo)
+ brw_bo_unreference(stage_state->scratch_bo);
+
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ unsigned thread_count;
+ switch(stage_state->stage) {
+ case MESA_SHADER_VERTEX:
+ thread_count = devinfo->max_vs_threads;
+ break;
+ case MESA_SHADER_TESS_CTRL:
+ thread_count = devinfo->max_tcs_threads;
+ break;
+ case MESA_SHADER_TESS_EVAL:
+ thread_count = devinfo->max_tes_threads;
+ break;
+ case MESA_SHADER_GEOMETRY:
+ thread_count = devinfo->max_gs_threads;
+ break;
+ case MESA_SHADER_FRAGMENT:
+ thread_count = devinfo->max_wm_threads;
+ break;
+ case MESA_SHADER_COMPUTE: {
+ unsigned subslices = MAX2(brw->screen->subslice_total, 1);
+
+ /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
+ *
+ * "Scratch Space per slice is computed based on 4 sub-slices. SW must
+ * allocate scratch space enough so that each slice has 4 slices
+ * allowed."
+ *
+ * According to the other driver team, this applies to compute shaders
+ * as well. This is not currently documented at all.
+ */
+ if (devinfo->gen >= 9)
+ subslices = 4;
+
+ /* WaCSScratchSize:hsw
+ *
+ * Haswell's scratch space address calculation appears to be sparse
+ * rather than tightly packed. The Thread ID has bits indicating
+ * which subslice, EU within a subslice, and thread within an EU
+ * it is. There's a maximum of two slices and two subslices, so these
+ * can be stored with a single bit. Even though there are only 10 EUs
+ * per subslice, this is stored in 4 bits, so there's an effective
+ * maximum value of 16 EUs. Similarly, although there are only 7
+ * threads per EU, this is stored in a 3 bit number, giving an effective
+ * maximum value of 8 threads per EU.
+ *
+ * This means that we need to use 16 * 8 instead of 10 * 7 for the
+ * number of threads per subslice.
+ */
+ const unsigned scratch_ids_per_subslice =
+ devinfo->is_haswell ? 16 * 8 : devinfo->max_cs_threads;
- stage_state->scratch_bo =
- brw_bo_alloc(brw->bufmgr, "shader scratch space",
- per_thread_size * thread_count, 4096);
+ thread_count = scratch_ids_per_subslice * subslices;
+ break;
}
+ default:
+ unreachable("Unsupported stage!");
+ }
+
+ stage_state->scratch_bo =
+ brw_bo_alloc(brw->bufmgr, "shader scratch space",
+ per_thread_size * thread_count, 4096);
}
void brwInitFragProgFuncs( struct dd_function_table *functions )
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
- ralloc_free(prog_data->image_param);
}
void
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
}
- if (prog->info.num_ssbos) {
+ if (prog->info.num_ssbos || prog->info.num_abos) {
+ assert(prog->info.num_abos <= BRW_MAX_ABO);
assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_ssbos;
+ next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
} else {
stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
}
stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
}
- if (prog->nir->info.uses_texture_gather) {
+ if (prog->info.uses_texture_gather) {
if (devinfo->gen >= 8) {
stage_prog_data->binding_table.gather_texture_start =
stage_prog_data->binding_table.texture_start;
stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
}
- if (prog->info.num_abos) {
- stage_prog_data->binding_table.abo_start = next_binding_table_offset;
- next_binding_table_offset += prog->info.num_abos;
- } else {
- stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
- }
-
if (prog->info.num_images) {
stage_prog_data->binding_table.image_start = next_binding_table_offset;
next_binding_table_offset += prog->info.num_images;
assert(next_binding_table_offset <= BRW_MAX_SURFACES);
return next_binding_table_offset;
}
+
+void
+brw_program_deserialize_nir(struct gl_context *ctx, struct gl_program *prog,
+ gl_shader_stage stage)
+{
+ if (!prog->nir) {
+ assert(prog->driver_cache_blob && prog->driver_cache_blob_size > 0);
+ const struct nir_shader_compiler_options *options =
+ ctx->Const.ShaderCompilerOptions[stage].NirOptions;
+ struct blob_reader reader;
+ blob_reader_init(&reader, prog->driver_cache_blob,
+ prog->driver_cache_blob_size);
+ prog->nir = nir_deserialize(NULL, options, &reader);
+ }
+
+ if (prog->driver_cache_blob) {
+ ralloc_free(prog->driver_cache_blob);
+ prog->driver_cache_blob = NULL;
+ prog->driver_cache_blob_size = 0;
+ }
+}