i965/cs: Rework cs_emit to take a nir_shader and a brw_compiler
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
index 1354a2668d50dac6bf626a32efca78f38dcf4d75..ce130dffad6319f96a02a918c817a76725326461 100644 (file)
@@ -47,7 +47,7 @@
 #include "brw_dead_control_flow.h"
 #include "main/uniforms.h"
 #include "brw_fs_live_variables.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
 #include "program/sampler.h"
 
 using namespace brw;
@@ -210,7 +210,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld,
    inst->regs_written = regs_written;
 
    if (devinfo->gen < 7) {
-      inst->base_mrf = 13;
+      inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
       inst->header_size = 1;
       if (devinfo->gen == 4)
          inst->mlen = 3;
@@ -1048,11 +1048,11 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
    unsigned int array_elements;
 
    if (type->is_array()) {
-      array_elements = type->length;
+      array_elements = type->arrays_of_arrays_size();
       if (array_elements == 0) {
          fail("dereferenced array '%s' has length 0\n", name);
       }
-      type = type->fields.array;
+      type = type->without_array();
    } else {
       array_elements = 1;
    }
@@ -1401,7 +1401,7 @@ fs_visitor::calculate_urb_setup()
    int urb_next = 0;
    /* Figure out where each of the incoming setup attributes lands. */
    if (devinfo->gen >= 6) {
-      if (_mesa_bitcount_64(prog->InputsRead &
+      if (_mesa_bitcount_64(nir->info.inputs_read &
                             BRW_FS_VARYING_INPUT_MASK) <= 16) {
          /* The SF/SBE pipeline stage can do arbitrary rearrangement of the
           * first 16 varying inputs, so we can put them wherever we want.
@@ -1413,7 +1413,7 @@ fs_visitor::calculate_urb_setup()
           * a different vertex (or geometry) shader.
           */
          for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
-            if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+            if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
                 BITFIELD64_BIT(i)) {
                prog_data->urb_setup[i] = urb_next++;
             }
@@ -1427,7 +1427,7 @@ fs_visitor::calculate_urb_setup()
          struct brw_vue_map prev_stage_vue_map;
          brw_compute_vue_map(devinfo, &prev_stage_vue_map,
                              key->input_slots_valid,
-                             shader_prog->SeparateShader);
+                             nir->info.separate_shader);
          int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
          assert(prev_stage_vue_map.num_slots <= first_slot + 32);
          for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
@@ -1437,7 +1437,7 @@ fs_visitor::calculate_urb_setup()
              * unused.
              */
             if (varying != BRW_VARYING_SLOT_COUNT &&
-                (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+                (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
                  BITFIELD64_BIT(varying))) {
                prog_data->urb_setup[varying] = slot - first_slot;
             }
@@ -1470,7 +1470,7 @@ fs_visitor::calculate_urb_setup()
        *
        * See compile_sf_prog() for more info.
        */
-      if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+      if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
          prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
    }
 
@@ -1508,21 +1508,14 @@ void
 fs_visitor::assign_vs_urb_setup()
 {
    brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
-   int grf, count, slot, channel, attr;
 
    assert(stage == MESA_SHADER_VERTEX);
-   count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+   int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
    if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
       count++;
 
    /* Each attribute is 4 regs. */
-   this->first_non_payload_grf += count * 4;
-
-   unsigned vue_entries =
-      MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
-   vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
-   vs_prog_data->base.urb_read_length = (count + 1) / 2;
+   this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
 
    assert(vs_prog_data->base.urb_read_length <= 15);
 
@@ -1530,25 +1523,10 @@ fs_visitor::assign_vs_urb_setup()
    foreach_block_and_inst(block, fs_inst, inst, cfg) {
       for (int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == ATTR) {
-
-            if (inst->src[i].reg == VERT_ATTRIB_MAX) {
-               slot = count - 1;
-            } else {
-               /* Attributes come in in a contiguous block, ordered by their
-                * gl_vert_attrib value.  That means we can compute the slot
-                * number for an attribute by masking out the enabled
-                * attributes before it and counting the bits.
-                */
-               attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
-               slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
-                                        BITFIELD64_MASK(attr));
-            }
-
-            channel = inst->src[i].reg_offset & 3;
-
-            grf = payload.num_regs +
-               prog_data->curb_read_length +
-               slot * 4 + channel;
+            int grf = payload.num_regs +
+                      prog_data->curb_read_length +
+                      inst->src[i].reg +
+                      inst->src[i].reg_offset;
 
             inst->src[i].file = HW_REG;
             inst->src[i].fixed_hw_reg =
@@ -2678,7 +2656,7 @@ fs_visitor::emit_repclear_shader()
 bool
 fs_visitor::remove_duplicate_mrf_writes()
 {
-   fs_inst *last_mrf_move[16];
+   fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)];
    bool progress = false;
 
    /* Need to update the MRF tracking for compressed instructions. */
@@ -2999,7 +2977,7 @@ fs_visitor::lower_uniform_pull_constant_loads()
           * else does except for register spill/unspill, which generates and
           * uses its MRF within a single IR instruction.
           */
-         inst->base_mrf = 14;
+         inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
          inst->mlen = 1;
       }
    }
@@ -4634,7 +4612,7 @@ void
 fs_visitor::setup_payload_gen6()
 {
    bool uses_depth =
-      (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+      (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
    unsigned barycentric_interp_modes =
       (stage == MESA_SHADER_FRAGMENT) ?
       ((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
@@ -4693,7 +4671,7 @@ fs_visitor::setup_payload_gen6()
    }
 
    /* R32: MSAA input coverage mask */
-   if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+   if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
       assert(devinfo->gen >= 7);
       payload.sample_mask_in_reg = payload.num_regs;
       payload.num_regs++;
@@ -4706,7 +4684,7 @@ fs_visitor::setup_payload_gen6()
    /* R34-: bary for 32-pixel. */
    /* R58-59: interp W for 32-pixel. */
 
-   if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+   if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
       source_depth_to_render_target = true;
    }
 }
@@ -4718,20 +4696,43 @@ fs_visitor::setup_vs_payload()
    payload.num_regs = 2;
 }
 
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ *    no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ *    easily use 16-bit words rather than 32-bit dwords in the push constant
+ *    data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ *    conveying the data, and thereby reduce push constant usage.
+ *
+ */
 void
 fs_visitor::setup_cs_payload()
 {
    assert(devinfo->gen >= 7);
+   brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
 
    payload.num_regs = 1;
 
-   if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
-      const unsigned local_id_dwords =
-         brw_cs_prog_local_id_payload_dwords(dispatch_width);
-      assert((local_id_dwords & 0x7) == 0);
-      const unsigned local_id_regs = local_id_dwords / 8;
+   if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+      prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
       payload.local_invocation_id_reg = payload.num_regs;
-      payload.num_regs += local_id_regs;
+      payload.num_regs += prog_data->local_invocation_id_regs;
    }
 }
 
@@ -4786,8 +4787,8 @@ fs_visitor::optimize()
                                                                         \
       if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) {   \
          char filename[64];                                             \
-         snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass,              \
-                  stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+         snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass,              \
+                  stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
                                                                         \
          backend_shader::dump_instructions(filename);                   \
       }                                                                 \
@@ -4800,9 +4801,8 @@ fs_visitor::optimize()
 
    if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
       char filename[64];
-      snprintf(filename, 64, "%s%d-%04d-00-start",
-               stage_abbrev, dispatch_width,
-               shader_prog ? shader_prog->Name : 0);
+      snprintf(filename, 64, "%s%d-%s-00-start",
+               stage_abbrev, dispatch_width, nir->info.name);
 
       backend_shader::dump_instructions(filename);
    }
@@ -4824,7 +4824,7 @@ fs_visitor::optimize()
       OPT(opt_algebraic);
       OPT(opt_cse);
       OPT(opt_copy_propagate);
-      OPT(opt_peephole_predicated_break);
+      OPT(opt_predicated_break, this);
       OPT(opt_cmod_propagation);
       OPT(dead_code_eliminate);
       OPT(opt_peephole_sel);
@@ -4986,8 +4986,6 @@ fs_visitor::run_fs(bool do_rep_send)
 
    assert(stage == MESA_SHADER_FRAGMENT);
 
-   sanity_param_count = prog->Parameters->NumParameters;
-
    if (devinfo->gen >= 6)
       setup_payload_gen6();
    else
@@ -5003,7 +5001,7 @@ fs_visitor::run_fs(bool do_rep_send)
          emit_shader_time_begin();
 
       calculate_urb_setup();
-      if (prog->InputsRead > 0) {
+      if (nir->info.inputs_read > 0) {
          if (devinfo->gen < 6)
             emit_interpolation_setup_gen4();
          else
@@ -5056,13 +5054,6 @@ fs_visitor::run_fs(bool do_rep_send)
    else
       wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
 
-   /* If any state parameters were appended, then ParameterValues could have
-    * been realloced, in which case the driver uniform storage set up by
-    * _mesa_associate_uniform_storage() would point to freed memory.  Make
-    * sure that didn't happen.
-    */
-   assert(sanity_param_count == prog->Parameters->NumParameters);
-
    return !failed;
 }
 
@@ -5070,9 +5061,6 @@ bool
 fs_visitor::run_cs()
 {
    assert(stage == MESA_SHADER_COMPUTE);
-   assert(shader);
-
-   sanity_param_count = prog->Parameters->NumParameters;
 
    setup_cs_payload();
 
@@ -5101,65 +5089,43 @@ fs_visitor::run_cs()
    if (failed)
       return false;
 
-   /* If any state parameters were appended, then ParameterValues could have
-    * been realloced, in which case the driver uniform storage set up by
-    * _mesa_associate_uniform_storage() would point to freed memory.  Make
-    * sure that didn't happen.
-    */
-   assert(sanity_param_count == prog->Parameters->NumParameters);
-
    return !failed;
 }
 
 const unsigned *
-brw_wm_fs_emit(struct brw_context *brw,
+brw_wm_fs_emit(const struct brw_compiler *compiler, void *log_data,
                void *mem_ctx,
                const struct brw_wm_prog_key *key,
                struct brw_wm_prog_data *prog_data,
-               struct gl_fragment_program *fp,
-               struct gl_shader_program *prog,
-               unsigned *final_assembly_size)
+               const nir_shader *shader,
+               struct gl_program *prog,
+               int shader_time_index8, int shader_time_index16,
+               bool use_rep_send,
+               unsigned *final_assembly_size,
+               char **error_str)
 {
-   struct brw_shader *shader = NULL;
-   if (prog)
-      shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
-   if (unlikely(INTEL_DEBUG & DEBUG_WM))
-      brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
-
-   int st_index8 = -1, st_index16 = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
-      st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
-      st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
-   }
-
-   /* Now the main event: Visit the shader IR and generate our FS IR for it.
-    */
-   fs_visitor v(brw->intelScreen->compiler, brw,
-                mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                prog, &fp->Base, 8, st_index8);
+   fs_visitor v(compiler, log_data, mem_ctx, key,
+                &prog_data->base, prog, shader, 8,
+                shader_time_index8);
    if (!v.run_fs(false /* do_rep_send */)) {
-      if (prog) {
-         prog->LinkStatus = false;
-         ralloc_strcat(&prog->InfoLog, v.fail_msg);
-      }
-
-      _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
-                    v.fail_msg);
+      if (error_str)
+         *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
 
       return NULL;
    }
 
    cfg_t *simd16_cfg = NULL;
-   fs_visitor v2(brw->intelScreen->compiler, brw,
-                 mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
-                 prog, &fp->Base, 16, st_index16);
-   if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
+   fs_visitor v2(compiler, log_data, mem_ctx, key,
+                 &prog_data->base, prog, shader, 16,
+                 shader_time_index16);
+   if (likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
       if (!v.simd16_unsupported) {
          /* Try a SIMD16 compile */
          v2.import_uniforms(&v);
-         if (!v2.run_fs(brw->use_rep_send)) {
-            perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
+         if (!v2.run_fs(use_rep_send)) {
+            compiler->shader_perf_log(log_data,
+                                      "SIMD16 shader failed to compile: %s",
+                                      v2.fail_msg);
          } else {
             simd16_cfg = v2.cfg;
          }
@@ -5167,8 +5133,8 @@ brw_wm_fs_emit(struct brw_context *brw,
    }
 
    cfg_t *simd8_cfg;
-   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
-   if ((no_simd8 || brw->gen < 5) && simd16_cfg) {
+   int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
+   if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
       simd8_cfg = NULL;
       prog_data->no_8 = true;
    } else {
@@ -5176,20 +5142,14 @@ brw_wm_fs_emit(struct brw_context *brw,
       prog_data->no_8 = false;
    }
 
-   fs_generator g(brw->intelScreen->compiler, brw,
-                  mem_ctx, (void *) key, &prog_data->base,
-                  &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
+   fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
+                  v.promoted_constants, v.runtime_check_aads_emit, "FS");
 
    if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
-      char *name;
-      if (prog)
-         name = ralloc_asprintf(mem_ctx, "%s fragment shader %d",
-                                prog->Label ? prog->Label : "unnamed",
-                                prog->Name);
-      else
-         name = ralloc_asprintf(mem_ctx, "fragment program %d", fp->Base.Id);
-
-      g.enable_debug(name);
+      g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
+                                     shader->info.label ? shader->info.label :
+                                                          "unnamed",
+                                     shader->info.name));
    }
 
    if (simd8_cfg)
@@ -5200,6 +5160,42 @@ brw_wm_fs_emit(struct brw_context *brw,
    return g.get_assembly(final_assembly_size);
 }
 
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+                             void *buffer, uint32_t threads, uint32_t stride)
+{
+   if (prog_data->local_invocation_id_regs == 0)
+      return;
+
+   /* 'stride' should be an integer number of registers, that is, a multiple
+    * of 32 bytes.
+    */
+   assert(stride % 32 == 0);
+
+   unsigned x = 0, y = 0, z = 0;
+   for (unsigned t = 0; t < threads; t++) {
+      uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+      for (unsigned i = 0; i < prog_data->simd_size; i++) {
+         param[0 * prog_data->simd_size + i] = x;
+         param[1 * prog_data->simd_size + i] = y;
+         param[2 * prog_data->simd_size + i] = z;
+
+         x++;
+         if (x == prog_data->local_size[0]) {
+            x = 0;
+            y++;
+            if (y == prog_data->local_size[1]) {
+               y = 0;
+               z++;
+               if (z == prog_data->local_size[2])
+                  z = 0;
+            }
+         }
+      }
+   }
+}
+
 fs_reg *
 fs_visitor::emit_cs_local_invocation_id_setup()
 {
@@ -5238,55 +5234,51 @@ fs_visitor::emit_cs_work_group_id_setup()
 }
 
 const unsigned *
-brw_cs_emit(struct brw_context *brw,
+brw_cs_emit(const struct brw_compiler *compiler, void *log_data,
             void *mem_ctx,
             const struct brw_cs_prog_key *key,
             struct brw_cs_prog_data *prog_data,
-            struct gl_compute_program *cp,
-            struct gl_shader_program *prog,
-            unsigned *final_assembly_size)
+            const nir_shader *shader,
+            int shader_time_index,
+            unsigned *final_assembly_size,
+            char **error_str)
 {
-   struct brw_shader *shader =
-      (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
-   if (unlikely(INTEL_DEBUG & DEBUG_CS))
-      brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
-   prog_data->local_size[0] = cp->LocalSize[0];
-   prog_data->local_size[1] = cp->LocalSize[1];
-   prog_data->local_size[2] = cp->LocalSize[2];
+   prog_data->local_size[0] = shader->info.cs.local_size[0];
+   prog_data->local_size[1] = shader->info.cs.local_size[1];
+   prog_data->local_size[2] = shader->info.cs.local_size[2];
    unsigned local_workgroup_size =
-      cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+      shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
+      shader->info.cs.local_size[2];
+
+   unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
 
    cfg_t *cfg = NULL;
    const char *fail_msg = NULL;
 
-   int st_index = -1;
-   if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-      st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
    /* Now the main event: Visit the shader IR and generate our CS IR for it.
     */
-   fs_visitor v8(brw->intelScreen->compiler, brw,
-                 mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                 &cp->Base, 8, st_index);
+   fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
+                 NULL, /* Never used in core profile */
+                 shader, 8, shader_time_index);
    if (!v8.run_cs()) {
       fail_msg = v8.fail_msg;
-   } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+   } else if (local_workgroup_size <= 8 * max_cs_threads) {
       cfg = v8.cfg;
       prog_data->simd_size = 8;
    }
 
-   fs_visitor v16(brw->intelScreen->compiler, brw,
-                  mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
-                  &cp->Base, 16, st_index);
+   fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
+                 NULL, /* Never used in core profile */
+                 shader, 16, shader_time_index);
    if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
        !fail_msg && !v8.simd16_unsupported &&
-       local_workgroup_size <= 16 * brw->max_cs_threads) {
+       local_workgroup_size <= 16 * max_cs_threads) {
       /* Try a SIMD16 compile */
       v16.import_uniforms(&v8);
       if (!v16.run_cs()) {
-         perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+         compiler->shader_perf_log(log_data,
+                                   "SIMD16 shader failed to compile: %s",
+                                   v16.fail_msg);
          if (!cfg) {
             fail_msg =
                "Couldn't generate SIMD16 program and not "
@@ -5300,20 +5292,19 @@ brw_cs_emit(struct brw_context *brw,
 
    if (unlikely(cfg == NULL)) {
       assert(fail_msg);
-      prog->LinkStatus = false;
-      ralloc_strcat(&prog->InfoLog, fail_msg);
-      _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
-                    fail_msg);
+      if (error_str)
+         *error_str = ralloc_strdup(mem_ctx, fail_msg);
+
       return NULL;
    }
 
-   fs_generator g(brw->intelScreen->compiler, brw,
-                  mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+   fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
                   v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
    if (INTEL_DEBUG & DEBUG_CS) {
-      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
-                                   prog->Label ? prog->Label : "unnamed",
-                                   prog->Name);
+      char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
+                                   shader->info.label ? shader->info.label :
+                                                        "unnamed",
+                                   shader->info.name);
       g.enable_debug(name);
    }