memset(&prog_data, 0, sizeof(prog_data));
+ if (prog->Comp.SharedSize > 64 * 1024) {
+ prog->LinkStatus = false;
+ const char *error_str =
+ "Compute shader used more than 64KB of shared variables";
+ ralloc_strcat(&prog->InfoLog, error_str);
+ _mesa_problem(NULL, "Failed to link compute shader: %s\n", error_str);
+
+ ralloc_free(mem_ctx);
+ return false;
+ } else {
+ prog_data.base.total_shared = prog->Comp.SharedSize;
+ }
+
assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog,
&cp->program.Base, &prog_data);
/* GEN7 DW5, GEN8+ DW6 */
# define MEDIA_BARRIER_ENABLE_SHIFT 21
# define MEDIA_BARRIER_ENABLE_MASK INTEL_MASK(21, 21)
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_SHIFT 16
+# define MEDIA_SHARED_LOCAL_MEMORY_SIZE_MASK INTEL_MASK(20, 16)
# define MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
assert(threads <= brw->max_cs_threads);
+
+ assert(prog_data->total_shared <= 64 * 1024);
+ uint32_t slm_size = 0;
+ if (prog_data->total_shared > 0) {
+ /* slm_size is in 4k increments, but must be a power of 2. */
+ slm_size = 4 * 1024;
+ while (slm_size < prog_data->total_shared)
+ slm_size <<= 1;
+ slm_size /= 4 * 1024;
+ }
+
desc[dw++] =
SET_FIELD(cs_prog_data->uses_barrier, MEDIA_BARRIER_ENABLE) |
+ SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |
media_threads;
BEGIN_BATCH(4);