nir: Embed the shader_info in the nir_shader again
[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
index ad728696a38447bddb9c589c17c23082b02725c7..d26dce07f9785399fccb819065978b0224e21635 100644 (file)
 
 #include "brw_program.h"
 #include "brw_context.h"
-#include "brw_shader.h"
-#include "brw_nir.h"
+#include "compiler/brw_nir.h"
+#include "brw_defines.h"
 #include "intel_batchbuffer.h"
 
-static void
+static bool
 brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
 {
    if (is_scalar) {
-      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, 0,
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
                                type_size_scalar_bytes);
-      nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
+      return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
    } else {
-      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, 0,
+      nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
                                type_size_vec4_bytes);
-      nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
+      return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
    }
 }
 
 nir_shader *
 brw_create_nir(struct brw_context *brw,
                const struct gl_shader_program *shader_prog,
-               const struct gl_program *prog,
+               struct gl_program *prog,
                gl_shader_stage stage,
                bool is_scalar)
 {
@@ -78,11 +78,13 @@ brw_create_nir(struct brw_context *brw,
    if (shader_prog) {
       nir = glsl_to_nir(shader_prog, stage, options);
       nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
+      nir_lower_returns(nir);
+      nir_validate_shader(nir);
       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                  nir_shader_get_entrypoint(nir), true, false);
    } else {
       nir = prog_to_nir(prog, options);
-      NIR_PASS_V(nir, nir_convert_to_ssa); /* turn registers into SSA */
+      NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
    }
    nir_validate_shader(nir);
 
@@ -105,6 +107,15 @@ brw_create_nir(struct brw_context *brw,
    NIR_PASS(progress, nir, nir_lower_system_values);
    NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
 
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   /* Copy the info we just generated back into the gl_program */
+   const char *prog_name = prog->info.name;
+   const char *prog_label = prog->info.label;
+   prog->info = nir->info;
+   prog->info.name = prog_name;
+   prog->info.label = prog_label;
+
    if (shader_prog) {
       NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
       NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
@@ -123,9 +134,8 @@ get_new_program_id(struct intel_screen *screen)
    return id;
 }
 
-static struct gl_program *brwNewProgram( struct gl_context *ctx,
-                                     GLenum target,
-                                     GLuint id )
+static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+                                        GLuint id, bool is_arb_asm)
 {
    struct brw_context *brw = brw_context(ctx);
 
@@ -133,47 +143,30 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
    case GL_VERTEX_PROGRAM_ARB:
    case GL_TESS_CONTROL_PROGRAM_NV:
    case GL_TESS_EVALUATION_PROGRAM_NV:
-   case GL_GEOMETRY_PROGRAM_NV: {
-      struct brw_program *prog = CALLOC_STRUCT(brw_program);
+   case GL_GEOMETRY_PROGRAM_NV:
+   case GL_COMPUTE_PROGRAM_NV: {
+      struct brw_program *prog = rzalloc(NULL, struct brw_program);
       if (prog) {
         prog->id = get_new_program_id(brw->screen);
 
-        return _mesa_init_gl_program(&prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
       }
       else
         return NULL;
    }
 
    case GL_FRAGMENT_PROGRAM_ARB: {
-      struct brw_program *prog;
-      if (brw->gen < 6) {
-         struct gen4_fragment_program *g4_prog =
-            CALLOC_STRUCT(gen4_fragment_program);
-         prog = &g4_prog->base;
-      } else {
-         prog = CALLOC_STRUCT(brw_program);
-      }
+      struct brw_program *prog = rzalloc(NULL, struct brw_program);
 
       if (prog) {
         prog->id = get_new_program_id(brw->screen);
 
-        return _mesa_init_gl_program(&prog->program, target, id);
+         return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
       }
       else
         return NULL;
    }
 
-   case GL_COMPUTE_PROGRAM_NV: {
-      struct brw_compute_program *prog = CALLOC_STRUCT(brw_compute_program);
-      if (prog) {
-         prog->id = get_new_program_id(brw->screen);
-
-         return _mesa_init_gl_program(&prog->program, target, id);
-      } else {
-         return NULL;
-      }
-   }
-
    default:
       unreachable("Unsupported target in brwNewProgram()");
    }
@@ -182,6 +175,49 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
 static void brwDeleteProgram( struct gl_context *ctx,
                              struct gl_program *prog )
 {
+   struct brw_context *brw = brw_context(ctx);
+
+   /* Beware!  prog's refcount has reached zero, and it's about to be freed.
+    *
+    * In brw_upload_pipeline_state(), we compare brw->foo_program to
+    * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
+    * pointer has changed.
+    *
+    * We cannot leave brw->foo_program as a dangling pointer to the dead
+    * program.  malloc() may allocate the same memory for a new gl_program,
+    * causing us to see matching pointers...but totally different programs.
+    *
+    * We cannot set brw->foo_program to NULL, either.  If we've deleted the
+    * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
+    * would cause us to see matching pointers (NULL == NULL), and fail to
+    * detect that a program has changed since our last draw.
+    *
+    * So, set it to a bogus gl_program pointer that will never match,
+    * causing us to properly reevaluate the state on our next draw.
+    *
+    * Getting this wrong causes heisenbugs which are very hard to catch,
+    * as you need a very specific allocation pattern to hit the problem.
+    */
+   static const struct gl_program deleted_program;
+
+   if (brw->vertex_program == prog)
+      brw->vertex_program = &deleted_program;
+
+   if (brw->tess_ctrl_program == prog)
+      brw->tess_ctrl_program = &deleted_program;
+
+   if (brw->tess_eval_program == prog)
+      brw->tess_eval_program = &deleted_program;
+
+   if (brw->geometry_program == prog)
+      brw->geometry_program = &deleted_program;
+
+   if (brw->fragment_program == prog)
+      brw->fragment_program = &deleted_program;
+
+   if (brw->compute_program == prog)
+      brw->compute_program = &deleted_program;
+
    _mesa_delete_program( ctx, prog );
 }
 
@@ -191,7 +227,7 @@ brwProgramStringNotify(struct gl_context *ctx,
                       GLenum target,
                       struct gl_program *prog)
 {
-   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->IsPositionInvariant);
+   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
 
    struct brw_context *brw = brw_context(ctx);
    const struct brw_compiler *compiler = brw->screen->compiler;
@@ -206,11 +242,9 @@ brwProgramStringNotify(struct gl_context *ctx,
         brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
       newFP->id = get_new_program_id(brw->screen);
 
-      brw_add_texrect_params(prog);
-
       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
 
-      brw_fs_precompile(ctx, NULL, prog);
+      brw_fs_precompile(ctx, prog);
       break;
    }
    case GL_VERTEX_PROGRAM_ARB: {
@@ -220,7 +254,7 @@ brwProgramStringNotify(struct gl_context *ctx,
 
       if (newVP == curVP)
         brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
-      if (newVP->program.IsPositionInvariant) {
+      if (newVP->program.arb.IsPositionInvariant) {
         _mesa_insert_mvp_code(ctx, &newVP->program);
       }
       newVP->id = get_new_program_id(brw->screen);
@@ -229,12 +263,10 @@ brwProgramStringNotify(struct gl_context *ctx,
        */
       _tnl_program_string(ctx, target, prog);
 
-      brw_add_texrect_params(prog);
-
       prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
                                  compiler->scalar_stage[MESA_SHADER_VERTEX]);
 
-      brw_vs_precompile(ctx, NULL, prog);
+      brw_vs_precompile(ctx, prog);
       break;
    }
    default:
@@ -307,38 +339,19 @@ brw_blend_barrier(struct gl_context *ctx)
    }
 }
 
-void
-brw_add_texrect_params(struct gl_program *prog)
-{
-   for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
-      if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
-         continue;
-
-      int tokens[STATE_LENGTH] = {
-         STATE_INTERNAL,
-         STATE_TEXRECT_SCALE,
-         texunit,
-         0,
-         0
-      };
-
-      _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
-   }
-}
-
 void
 brw_get_scratch_bo(struct brw_context *brw,
-                  drm_intel_bo **scratch_bo, int size)
+                  struct brw_bo **scratch_bo, int size)
 {
-   drm_intel_bo *old_bo = *scratch_bo;
+   struct brw_bo *old_bo = *scratch_bo;
 
    if (old_bo && old_bo->size < size) {
-      drm_intel_bo_unreference(old_bo);
+      brw_bo_unreference(old_bo);
       old_bo = NULL;
    }
 
    if (!old_bo) {
-      *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+      *scratch_bo = brw_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
    }
 }
 
@@ -356,11 +369,11 @@ brw_alloc_stage_scratch(struct brw_context *brw,
       stage_state->per_thread_scratch = per_thread_size;
 
       if (stage_state->scratch_bo)
-         drm_intel_bo_unreference(stage_state->scratch_bo);
+         brw_bo_unreference(stage_state->scratch_bo);
 
       stage_state->scratch_bo =
-         drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
-                            per_thread_size * thread_count, 4096);
+         brw_bo_alloc(brw->bufmgr, "shader scratch space",
+                      per_thread_size * thread_count, 4096);
    }
 }
 
@@ -372,7 +385,6 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
    functions->DeleteProgram = brwDeleteProgram;
    functions->ProgramStringNotify = brwProgramStringNotify;
 
-   functions->NewShader = brw_new_shader;
    functions->LinkShader = brw_link_shader;
 
    functions->MemoryBarrier = brw_memory_barrier;
@@ -390,8 +402,8 @@ brw_init_shader_time(struct brw_context *brw)
 {
    const int max_entries = 2048;
    brw->shader_time.bo =
-      drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
+      brw_bo_alloc(brw->bufmgr, "shader time",
+                   max_entries * BRW_SHADER_TIME_STRIDE * 3, 4096);
    brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
    brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
    brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
@@ -566,21 +578,21 @@ brw_collect_shader_time(struct brw_context *brw)
     * delaying reading the reports, but it doesn't look like it's a big
     * overhead compared to the cost of tracking the time in the first place.
     */
-   drm_intel_bo_map(brw->shader_time.bo, true);
+   brw_bo_map(brw, brw->shader_time.bo, true);
    void *bo_map = brw->shader_time.bo->virtual;
 
    for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+      uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
 
-      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
-      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
-      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
+      brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
    }
 
    /* Zero the BO out to clear it out for our next collection.
     */
    memset(bo_map, 0, brw->shader_time.bo->size);
-   drm_intel_bo_unmap(brw->shader_time.bo);
+   brw_bo_unmap(brw->shader_time.bo);
 }
 
 void
@@ -603,29 +615,25 @@ brw_collect_and_report_shader_time(struct brw_context *brw)
  * change their lifetimes compared to normal operation.
  */
 int
-brw_get_shader_time_index(struct brw_context *brw,
-                          struct gl_shader_program *shader_prog,
-                          struct gl_program *prog,
-                          enum shader_time_shader_type type)
+brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
+                          enum shader_time_shader_type type, bool is_glsl_sh)
 {
    int shader_time_index = brw->shader_time.num_entries++;
    assert(shader_time_index < brw->shader_time.max_entries);
    brw->shader_time.types[shader_time_index] = type;
 
-   int id = shader_prog ? shader_prog->Name : prog->Id;
    const char *name;
-   if (id == 0) {
+   if (prog->Id == 0) {
       name = "ff";
-   } else if (!shader_prog) {
-      name = "prog";
-   } else if (shader_prog->Label) {
-      name = ralloc_strdup(brw->shader_time.names, shader_prog->Label);
+   } else if (is_glsl_sh) {
+      name = prog->info.label ?
+         ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
    } else {
-      name = "glsl";
+      name = "prog";
    }
 
    brw->shader_time.names[shader_time_index] = name;
-   brw->shader_time.ids[shader_time_index] = id;
+   brw->shader_time.ids[shader_time_index] = prog->Id;
 
    return shader_time_index;
 }
@@ -633,7 +641,7 @@ brw_get_shader_time_index(struct brw_context *brw,
 void
 brw_destroy_shader_time(struct brw_context *brw)
 {
-   drm_intel_bo_unreference(brw->shader_time.bo);
+   brw_bo_unreference(brw->shader_time.bo);
    brw->shader_time.bo = NULL;
 }
 
@@ -648,22 +656,11 @@ brw_stage_prog_data_free(const void *p)
 }
 
 void
-brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
-            struct gl_linked_shader *shader, struct gl_program *prog)
+brw_dump_arb_asm(const char *stage, struct gl_program *prog)
 {
-   if (shader_prog) {
-      if (shader->ir) {
-         fprintf(stderr,
-                 "GLSL IR for native %s shader %d:\n",
-                 stage, shader_prog->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
-         fprintf(stderr, "\n\n");
-      }
-   } else {
-      fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
-              stage, prog->Id, stage);
-      _mesa_print_program(prog);
-   }
+   fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
+           stage, prog->Id, stage);
+   _mesa_print_program(prog);
 }
 
 void
@@ -684,3 +681,90 @@ brw_setup_tex_for_precompile(struct brw_context *brw,
       }
    }
 }
+
+/**
+ * Sets up the starting offsets for the groups of binding table entries
+ * common to all pipeline stages.
+ *
+ * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
+ * unused but also make sure that addition of small offsets to them will
+ * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
+ */
+uint32_t
+brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
+                                        const struct gl_program *prog,
+                                        struct brw_stage_prog_data *stage_prog_data,
+                                        uint32_t next_binding_table_offset)
+{
+   int num_textures = util_last_bit(prog->SamplersUsed);
+
+   stage_prog_data->binding_table.texture_start = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   if (prog->info.num_ubos) {
+      assert(prog->info.num_ubos <= BRW_MAX_UBO);
+      stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_ubos;
+   } else {
+      stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.num_ssbos) {
+      assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
+      stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_ssbos;
+   } else {
+      stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
+      next_binding_table_offset++;
+   } else {
+      stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
+   }
+
+   if (prog->nir->info.uses_texture_gather) {
+      if (devinfo->gen >= 8) {
+         stage_prog_data->binding_table.gather_texture_start =
+            stage_prog_data->binding_table.texture_start;
+      } else {
+         stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
+         next_binding_table_offset += num_textures;
+      }
+   } else {
+      stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.num_abos) {
+      stage_prog_data->binding_table.abo_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_abos;
+   } else {
+      stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.num_images) {
+      stage_prog_data->binding_table.image_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_images;
+   } else {
+      stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
+   }
+
+   /* This may or may not be used depending on how the compile goes. */
+   stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
+   next_binding_table_offset++;
+
+   /* Plane 0 is just the regular texture section */
+   stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
+
+   stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
+
+   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
+   return next_binding_table_offset;
+}