android,autotools,i965: Fix location of float64_glsl.h

[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c

index 647f138f0a39245ba2fd0540243cb9d7abfa002b..1038d9a47a0a7d2de0ed3c43dd3d7311f270c358 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -31,6 +31,7 @@
  
  #include <pthread.h>
  #include "main/imports.h"
+#include "main/glspirv.h"
  #include "program/prog_parameter.h"
  #include "program/prog_print.h"
  #include "program/prog_to_nir.h"
@@ -39,139 +40,240 @@
  #include "tnl/tnl.h"
  #include "util/ralloc.h"
  #include "compiler/glsl/ir.h"
+#include "compiler/glsl/program.h"
  #include "compiler/glsl/glsl_to_nir.h"
+#include "glsl/float64_glsl.h"
  
  #include "brw_program.h"
  #include "brw_context.h"
-#include "brw_shader.h"
-#include "brw_nir.h"
+#include "compiler/brw_nir.h"
+#include "brw_defines.h"
  #include "intel_batchbuffer.h"
  
-static void
+#include "brw_cs.h"
+#include "brw_gs.h"
+#include "brw_vs.h"
+#include "brw_wm.h"
+
+#include "main/shaderapi.h"
+#include "main/shaderobj.h"
+
+static bool
  brw_nir_lower_uniforms(nir_shader *nir, bool is_scalar)
  {
     if (is_scalar) {
        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
                                 type_size_scalar_bytes);
-      nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
+      return nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0);
     } else {
        nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms,
                                 type_size_vec4_bytes);
-      nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
+      return nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0);
+   }
+}
+
+static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+                                        GLuint id, bool is_arb_asm);
+
+static nir_shader *
+compile_fp64_funcs(struct gl_context *ctx,
+                   const nir_shader_compiler_options *options,
+                   void *mem_ctx,
+                   gl_shader_stage stage)
+{
+   const GLuint name = ~0;
+   struct gl_shader *sh;
+
+   sh = _mesa_new_shader(name, stage);
+
+   sh->Source = float64_source;
+   sh->CompileStatus = COMPILE_FAILURE;
+   _mesa_glsl_compile_shader(ctx, sh, false, false, true);
+
+   if (!sh->CompileStatus) {
+      if (sh->InfoLog) {
+         _mesa_problem(ctx,
+                       "fp64 software impl compile failed:\n%s\nsource:\n%s\n",
+                       sh->InfoLog, float64_source);
+      }
     }
+
+   struct gl_shader_program *sh_prog;
+   sh_prog = _mesa_new_shader_program(name);
+   sh_prog->Label = NULL;
+   sh_prog->NumShaders = 1;
+   sh_prog->Shaders = malloc(sizeof(struct gl_shader *));
+   sh_prog->Shaders[0] = sh;
+
+   struct gl_linked_shader *linked = rzalloc(NULL, struct gl_linked_shader);
+   linked->Stage = stage;
+   linked->Program =
+      brwNewProgram(ctx,
+                    _mesa_shader_stage_to_program(stage),
+                    name, false);
+
+   linked->ir = sh->ir;
+   sh_prog->_LinkedShaders[stage] = linked;
+
+   nir_shader *nir = glsl_to_nir(sh_prog, stage, options);
+
+   return nir_shader_clone(mem_ctx, nir);
  }
  
  nir_shader *
  brw_create_nir(struct brw_context *brw,
                 const struct gl_shader_program *shader_prog,
-               const struct gl_program *prog,
+               struct gl_program *prog,
                 gl_shader_stage stage,
                 bool is_scalar)
  {
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
     struct gl_context *ctx = &brw->ctx;
     const nir_shader_compiler_options *options =
        ctx->Const.ShaderCompilerOptions[stage].NirOptions;
-   bool progress;
     nir_shader *nir;
  
-   /* First, lower the GLSL IR or Mesa IR to NIR */
+   /* First, lower the GLSL/Mesa IR or SPIR-V to NIR */
     if (shader_prog) {
-      nir = glsl_to_nir(shader_prog, stage, options);
+      if (shader_prog->data->spirv) {
+         nir = _mesa_spirv_to_nir(ctx, shader_prog, stage, options);
+      } else {
+         nir = glsl_to_nir(shader_prog, stage, options);
+      }
+      assert (nir);
+
        nir_remove_dead_variables(nir, nir_var_shader_in | nir_var_shader_out);
+      nir_lower_returns(nir);
+      nir_validate_shader(nir, "after glsl_to_nir or spirv_to_nir and "
+                               "return lowering");
        NIR_PASS_V(nir, nir_lower_io_to_temporaries,
                   nir_shader_get_entrypoint(nir), true, false);
     } else {
        nir = prog_to_nir(prog, options);
-      NIR_PASS_V(nir, nir_convert_to_ssa); /* turn registers into SSA */
+      NIR_PASS_V(nir, nir_lower_regs_to_ssa); /* turn registers into SSA */
     }
-   nir_validate_shader(nir);
+   nir_validate_shader(nir, "before brw_preprocess_nir");
+
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
  
-   (void)progress;
+   if (!devinfo->has_64bit_types && nir->info.uses_64bit) {
+      nir_shader *fp64 = compile_fp64_funcs(ctx, options, ralloc_parent(nir), stage);
+
+      nir_validate_shader(fp64, "fp64");
+      exec_list_append(&nir->functions, &fp64->functions);
+   }
  
     nir = brw_preprocess_nir(brw->screen->compiler, nir);
  
+   NIR_PASS_V(nir, brw_nir_lower_image_load_store, devinfo);
+
+   if (stage == MESA_SHADER_TESS_CTRL) {
+      /* Lower gl_PatchVerticesIn from a sys. value to a uniform on Gen8+. */
+      static const gl_state_index16 tokens[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_TCS_PATCH_VERTICES_IN };
+      nir_lower_patch_vertices(nir, 0, devinfo->gen >= 8 ? tokens : NULL);
+   }
+
+   if (stage == MESA_SHADER_TESS_EVAL) {
+      /* Lower gl_PatchVerticesIn to a constant if we have a TCS, or
+       * a uniform if we don't.
+       */
+      struct gl_linked_shader *tcs =
+         shader_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
+      uint32_t static_patch_vertices =
+         tcs ? tcs->Program->nir->info.tess.tcs_vertices_out : 0;
+      static const gl_state_index16 tokens[STATE_LENGTH] =
+         { STATE_INTERNAL, STATE_TES_PATCH_VERTICES_IN };
+      nir_lower_patch_vertices(nir, static_patch_vertices, tokens);
+   }
+
     if (stage == MESA_SHADER_FRAGMENT) {
        static const struct nir_lower_wpos_ytransform_options wpos_options = {
           .state_tokens = {STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0},
           .fs_coord_pixel_center_integer = 1,
           .fs_coord_origin_upper_left = 1,
        };
-      _mesa_add_state_reference(prog->Parameters,
-                                (gl_state_index *) wpos_options.state_tokens);
  
+      bool progress = false;
        NIR_PASS(progress, nir, nir_lower_wpos_ytransform, &wpos_options);
+      if (progress) {
+         _mesa_add_state_reference(prog->Parameters,
+                                   wpos_options.state_tokens);
+      }
     }
  
-   NIR_PASS(progress, nir, nir_lower_system_values);
     NIR_PASS_V(nir, brw_nir_lower_uniforms, is_scalar);
  
-   if (shader_prog) {
-      NIR_PASS_V(nir, nir_lower_samplers, shader_prog);
-      NIR_PASS_V(nir, nir_lower_atomics, shader_prog);
-   }
-
     return nir;
  }
  
+void
+brw_shader_gather_info(nir_shader *nir, struct gl_program *prog)
+{
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   /* Copy the info we just generated back into the gl_program */
+   const char *prog_name = prog->info.name;
+   const char *prog_label = prog->info.label;
+   prog->info = nir->info;
+   prog->info.name = prog_name;
+   prog->info.label = prog_label;
+}
+
  static unsigned
  get_new_program_id(struct intel_screen *screen)
  {
-   static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
-   pthread_mutex_lock(&m);
-   unsigned id = screen->program_id++;
-   pthread_mutex_unlock(&m);
-   return id;
+   return p_atomic_inc_return(&screen->program_id);
  }
  
-static struct gl_program *brwNewProgram( struct gl_context *ctx,
-                                     GLenum target,
-                                     GLuint id )
+static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
+                                        GLuint id, bool is_arb_asm)
  {
     struct brw_context *brw = brw_context(ctx);
+   struct brw_program *prog = rzalloc(NULL, struct brw_program);
  
-   switch (target) {
-   case GL_VERTEX_PROGRAM_ARB:
-   case GL_TESS_CONTROL_PROGRAM_NV:
-   case GL_TESS_EVALUATION_PROGRAM_NV:
-   case GL_GEOMETRY_PROGRAM_NV:
-   case GL_COMPUTE_PROGRAM_NV: {
-      struct brw_program *prog = rzalloc(NULL, struct brw_program);
-      if (prog) {
-        prog->id = get_new_program_id(brw->screen);
-
-        return _mesa_init_gl_program(&prog->program, target, id);
-      }
-      else
-        return NULL;
-   }
-
-   case GL_FRAGMENT_PROGRAM_ARB: {
-      struct brw_program *prog;
-      if (brw->gen < 6) {
-         struct gen4_fragment_program *g4_prog =
-            rzalloc(NULL, struct gen4_fragment_program);
-         prog = &g4_prog->base;
-      } else {
-         prog = CALLOC_STRUCT(brw_program);
-      }
-
-      if (prog) {
-        prog->id = get_new_program_id(brw->screen);
+   if (prog) {
+      prog->id = get_new_program_id(brw->screen);
  
-        return _mesa_init_gl_program(&prog->program, target, id);
-      }
-      else
-        return NULL;
+      return _mesa_init_gl_program(&prog->program, target, id, is_arb_asm);
     }
  
-   default:
-      unreachable("Unsupported target in brwNewProgram()");
-   }
+   return NULL;
  }
  
  static void brwDeleteProgram( struct gl_context *ctx,
                               struct gl_program *prog )
  {
+   struct brw_context *brw = brw_context(ctx);
+
+   /* Beware!  prog's refcount has reached zero, and it's about to be freed.
+    *
+    * In brw_upload_pipeline_state(), we compare brw->programs[i] to
+    * ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
+    * pointer has changed.
+    *
+    * We cannot leave brw->programs[i] as a dangling pointer to the dead
+    * program.  malloc() may allocate the same memory for a new gl_program,
+    * causing us to see matching pointers...but totally different programs.
+    *
+    * We cannot set brw->programs[i] to NULL, either.  If we've deleted the
+    * active program, Mesa may set ctx->FooProgram._Current to NULL.  That
+    * would cause us to see matching pointers (NULL == NULL), and fail to
+    * detect that a program has changed since our last draw.
+    *
+    * So, set it to a bogus gl_program pointer that will never match,
+    * causing us to properly reevaluate the state on our next draw.
+    *
+    * Getting this wrong causes heisenbugs which are very hard to catch,
+    * as you need a very specific allocation pattern to hit the problem.
+    */
+   static const struct gl_program deleted_program;
+
+   for (int i = 0; i < MESA_SHADER_STAGES; i++) {
+      if (brw->programs[i] == prog)
+         brw->programs[i] = (struct gl_program *) &deleted_program;
+   }
+
     _mesa_delete_program( ctx, prog );
  }
  
@@ -181,7 +283,7 @@ brwProgramStringNotify(struct gl_context *ctx,
                        GLenum target,
                        struct gl_program *prog)
  {
-   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->IsPositionInvariant);
+   assert(target == GL_VERTEX_PROGRAM_ARB || !prog->arb.IsPositionInvariant);
  
     struct brw_context *brw = brw_context(ctx);
     const struct brw_compiler *compiler = brw->screen->compiler;
@@ -190,27 +292,27 @@ brwProgramStringNotify(struct gl_context *ctx,
     case GL_FRAGMENT_PROGRAM_ARB: {
        struct brw_program *newFP = brw_program(prog);
        const struct brw_program *curFP =
-         brw_program_const(brw->fragment_program);
+         brw_program_const(brw->programs[MESA_SHADER_FRAGMENT]);
  
        if (newFP == curFP)
          brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM;
        newFP->id = get_new_program_id(brw->screen);
  
-      brw_add_texrect_params(prog);
-
        prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
  
-      brw_fs_precompile(ctx, NULL, prog);
+      brw_shader_gather_info(prog->nir, prog);
+
+      brw_fs_precompile(ctx, prog);
        break;
     }
     case GL_VERTEX_PROGRAM_ARB: {
        struct brw_program *newVP = brw_program(prog);
        const struct brw_program *curVP =
-         brw_program_const(brw->vertex_program);
+         brw_program_const(brw->programs[MESA_SHADER_VERTEX]);
  
        if (newVP == curVP)
          brw->ctx.NewDriverState |= BRW_NEW_VERTEX_PROGRAM;
-      if (newVP->program.IsPositionInvariant) {
+      if (newVP->program.arb.IsPositionInvariant) {
          _mesa_insert_mvp_code(ctx, &newVP->program);
        }
        newVP->id = get_new_program_id(brw->screen);
@@ -219,12 +321,12 @@ brwProgramStringNotify(struct gl_context *ctx,
         */
        _tnl_program_string(ctx, target, prog);
  
-      brw_add_texrect_params(prog);
-
        prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
  
-      brw_vs_precompile(ctx, NULL, prog);
+      brw_shader_gather_info(prog->nir, prog);
+
+      brw_vs_precompile(ctx, prog);
        break;
     }
     default:
@@ -245,10 +347,9 @@ static void
  brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
  {
     struct brw_context *brw = brw_context(ctx);
-   unsigned bits = (PIPE_CONTROL_DATA_CACHE_FLUSH |
-                    PIPE_CONTROL_NO_WRITE |
-                    PIPE_CONTROL_CS_STALL);
-   assert(brw->gen >= 7 && brw->gen <= 9);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   unsigned bits = PIPE_CONTROL_DATA_CACHE_FLUSH | PIPE_CONTROL_CS_STALL;
+   assert(devinfo->gen >= 7 && devinfo->gen <= 11);
  
     if (barriers & (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
                     GL_ELEMENT_ARRAY_BARRIER_BIT |
@@ -262,29 +363,32 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers)
     if (barriers & GL_TEXTURE_FETCH_BARRIER_BIT)
        bits |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  
-   if (barriers & GL_TEXTURE_UPDATE_BARRIER_BIT)
-      bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
+   if (barriers & (GL_TEXTURE_UPDATE_BARRIER_BIT |
+                   GL_PIXEL_BUFFER_BARRIER_BIT))
+      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
+               PIPE_CONTROL_RENDER_TARGET_FLUSH);
  
     if (barriers & GL_FRAMEBUFFER_BARRIER_BIT)
-      bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+      bits |= (PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
                 PIPE_CONTROL_RENDER_TARGET_FLUSH);
  
     /* Typed surface messages are handled by the render cache on IVB, so we
      * need to flush it too.
      */
-   if (brw->gen == 7 && !brw->is_haswell)
+   if (devinfo->gen == 7 && !devinfo->is_haswell)
        bits |= PIPE_CONTROL_RENDER_TARGET_FLUSH;
  
     brw_emit_pipe_control_flush(brw, bits);
  }
  
  static void
-brw_blend_barrier(struct gl_context *ctx)
+brw_framebuffer_fetch_barrier(struct gl_context *ctx)
  {
     struct brw_context *brw = brw_context(ctx);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
  
-   if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
-      if (brw->gen >= 6) {
+   if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
+      if (devinfo->gen >= 6) {
           brw_emit_pipe_control_flush(brw,
                                       PIPE_CONTROL_RENDER_TARGET_FLUSH |
                                       PIPE_CONTROL_CS_STALL);
@@ -297,38 +401,20 @@ brw_blend_barrier(struct gl_context *ctx)
     }
  }
  
-void
-brw_add_texrect_params(struct gl_program *prog)
-{
-   for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
-      if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
-         continue;
-
-      int tokens[STATE_LENGTH] = {
-         STATE_INTERNAL,
-         STATE_TEXRECT_SCALE,
-         texunit,
-         0,
-         0
-      };
-
-      _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
-   }
-}
-
  void
  brw_get_scratch_bo(struct brw_context *brw,
-                  drm_intel_bo **scratch_bo, int size)
+                  struct brw_bo **scratch_bo, int size)
  {
-   drm_intel_bo *old_bo = *scratch_bo;
+   struct brw_bo *old_bo = *scratch_bo;
  
     if (old_bo && old_bo->size < size) {
-      drm_intel_bo_unreference(old_bo);
+      brw_bo_unreference(old_bo);
        old_bo = NULL;
     }
  
     if (!old_bo) {
-      *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+      *scratch_bo =
+         brw_bo_alloc(brw->bufmgr, "scratch bo", size, BRW_MEMZONE_SCRATCH);
     }
  }
  
@@ -339,19 +425,91 @@ brw_get_scratch_bo(struct brw_context *brw,
  void
  brw_alloc_stage_scratch(struct brw_context *brw,
                          struct brw_stage_state *stage_state,
-                        unsigned per_thread_size,
-                        unsigned thread_count)
+                        unsigned per_thread_size)
  {
-   if (stage_state->per_thread_scratch < per_thread_size) {
-      stage_state->per_thread_scratch = per_thread_size;
+   if (stage_state->per_thread_scratch >= per_thread_size)
+      return;
+
+   stage_state->per_thread_scratch = per_thread_size;
+
+   if (stage_state->scratch_bo)
+      brw_bo_unreference(stage_state->scratch_bo);
  
-      if (stage_state->scratch_bo)
-         drm_intel_bo_unreference(stage_state->scratch_bo);
+   const struct gen_device_info *devinfo = &brw->screen->devinfo;
+   unsigned thread_count;
+   switch(stage_state->stage) {
+   case MESA_SHADER_VERTEX:
+      thread_count = devinfo->max_vs_threads;
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      thread_count = devinfo->max_tcs_threads;
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      thread_count = devinfo->max_tes_threads;
+      break;
+   case MESA_SHADER_GEOMETRY:
+      thread_count = devinfo->max_gs_threads;
+      break;
+   case MESA_SHADER_FRAGMENT:
+      thread_count = devinfo->max_wm_threads;
+      break;
+   case MESA_SHADER_COMPUTE: {
+      unsigned subslices = MAX2(brw->screen->subslice_total, 1);
+
+      /* The documentation for 3DSTATE_PS "Scratch Space Base Pointer" says:
+       *
+       * "Scratch Space per slice is computed based on 4 sub-slices.  SW must
+       *  allocate scratch space enough so that each slice has 4 slices
+       *  allowed."
+       *
+       * According to the other driver team, this applies to compute shaders
+       * as well.  This is not currently documented at all.
+       *
+       * brw->screen->subslice_total is the TOTAL number of subslices
+       * and we wish to view that there are 4 subslices per slice
+       * instead of the actual number of subslices per slice.
+       */
+      if (devinfo->gen >= 9 && devinfo->gen < 11)
+         subslices = 4 * brw->screen->devinfo.num_slices;
+
+      unsigned scratch_ids_per_subslice;
+      if (devinfo->is_haswell) {
+         /* WaCSScratchSize:hsw
+          *
+          * Haswell's scratch space address calculation appears to be sparse
+          * rather than tightly packed. The Thread ID has bits indicating
+          * which subslice, EU within a subslice, and thread within an EU it
+          * is. There's a maximum of two slices and two subslices, so these
+          * can be stored with a single bit. Even though there are only 10 EUs
+          * per subslice, this is stored in 4 bits, so there's an effective
+          * maximum value of 16 EUs. Similarly, although there are only 7
+          * threads per EU, this is stored in a 3 bit number, giving an
+          * effective maximum value of 8 threads per EU.
+          *
+          * This means that we need to use 16 * 8 instead of 10 * 7 for the
+          * number of threads per subslice.
+          */
+         scratch_ids_per_subslice = 16 * 8;
+      } else if (devinfo->is_cherryview) {
+         /* Cherryview devices have either 6 or 8 EUs per subslice, and each
+          * EU has 7 threads. The 6 EU devices appear to calculate thread IDs
+          * as if it had 8 EUs.
+          */
+         scratch_ids_per_subslice = 8 * 7;
+      } else {
+         scratch_ids_per_subslice = devinfo->max_cs_threads;
+      }
  
-      stage_state->scratch_bo =
-         drm_intel_bo_alloc(brw->bufmgr, "shader scratch space",
-                            per_thread_size * thread_count, 4096);
+      thread_count = scratch_ids_per_subslice * subslices;
+      break;
     }
+   default:
+      unreachable("Unsupported stage!");
+   }
+
+   stage_state->scratch_bo =
+      brw_bo_alloc(brw->bufmgr, "shader scratch space",
+                   per_thread_size * thread_count, BRW_MEMZONE_SCRATCH);
  }
  
  void brwInitFragProgFuncs( struct dd_function_table *functions )
@@ -362,11 +520,10 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
     functions->DeleteProgram = brwDeleteProgram;
     functions->ProgramStringNotify = brwProgramStringNotify;
  
-   functions->NewShader = brw_new_shader;
     functions->LinkShader = brw_link_shader;
  
     functions->MemoryBarrier = brw_memory_barrier;
-   functions->BlendBarrier = brw_blend_barrier;
+   functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
  }
  
  struct shader_times {
@@ -380,8 +537,9 @@ brw_init_shader_time(struct brw_context *brw)
  {
     const int max_entries = 2048;
     brw->shader_time.bo =
-      drm_intel_bo_alloc(brw->bufmgr, "shader time",
-                         max_entries * SHADER_TIME_STRIDE * 3, 4096);
+      brw_bo_alloc(brw->bufmgr, "shader time",
+                   max_entries * BRW_SHADER_TIME_STRIDE * 3,
+                   BRW_MEMZONE_OTHER);
     brw->shader_time.names = rzalloc_array(brw, const char *, max_entries);
     brw->shader_time.ids = rzalloc_array(brw, int, max_entries);
     brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
@@ -447,6 +605,7 @@ brw_report_shader_time(struct brw_context *brw)
        case ST_GS:
        case ST_FS8:
        case ST_FS16:
+      case ST_FS32:
        case ST_CS:
           written = brw->shader_time.cumulative[i].written;
           reset = brw->shader_time.cumulative[i].reset;
@@ -475,6 +634,7 @@ brw_report_shader_time(struct brw_context *brw)
        case ST_GS:
        case ST_FS8:
        case ST_FS16:
+      case ST_FS32:
        case ST_CS:
           total_by_type[type] += scaled[i];
           break;
@@ -524,6 +684,9 @@ brw_report_shader_time(struct brw_context *brw)
        case ST_FS16:
           stage = "fs16";
           break;
+      case ST_FS32:
+         stage = "fs32";
+         break;
        case ST_CS:
           stage = "cs";
           break;
@@ -543,6 +706,7 @@ brw_report_shader_time(struct brw_context *brw)
     print_shader_time_line("total", "gs", 0, total_by_type[ST_GS], total);
     print_shader_time_line("total", "fs8", 0, total_by_type[ST_FS8], total);
     print_shader_time_line("total", "fs16", 0, total_by_type[ST_FS16], total);
+   print_shader_time_line("total", "fs32", 0, total_by_type[ST_FS32], total);
     print_shader_time_line("total", "cs", 0, total_by_type[ST_CS], total);
  }
  
@@ -556,21 +720,20 @@ brw_collect_shader_time(struct brw_context *brw)
      * delaying reading the reports, but it doesn't look like it's a big
      * overhead compared to the cost of tracking the time in the first place.
      */
-   drm_intel_bo_map(brw->shader_time.bo, true);
-   void *bo_map = brw->shader_time.bo->virtual;
+   void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE);
  
     for (int i = 0; i < brw->shader_time.num_entries; i++) {
-      uint32_t *times = bo_map + i * 3 * SHADER_TIME_STRIDE;
+      uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE;
  
-      brw->shader_time.cumulative[i].time += times[SHADER_TIME_STRIDE * 0 / 4];
-      brw->shader_time.cumulative[i].written += times[SHADER_TIME_STRIDE * 1 / 4];
-      brw->shader_time.cumulative[i].reset += times[SHADER_TIME_STRIDE * 2 / 4];
+      brw->shader_time.cumulative[i].time += times[BRW_SHADER_TIME_STRIDE * 0 / 4];
+      brw->shader_time.cumulative[i].written += times[BRW_SHADER_TIME_STRIDE * 1 / 4];
+      brw->shader_time.cumulative[i].reset += times[BRW_SHADER_TIME_STRIDE * 2 / 4];
     }
  
     /* Zero the BO out to clear it out for our next collection.
      */
     memset(bo_map, 0, brw->shader_time.bo->size);
-   drm_intel_bo_unmap(brw->shader_time.bo);
+   brw_bo_unmap(brw->shader_time.bo);
  }
  
  void
@@ -593,29 +756,25 @@ brw_collect_and_report_shader_time(struct brw_context *brw)
   * change their lifetimes compared to normal operation.
   */
  int
-brw_get_shader_time_index(struct brw_context *brw,
-                          struct gl_shader_program *shader_prog,
-                          struct gl_program *prog,
-                          enum shader_time_shader_type type)
+brw_get_shader_time_index(struct brw_context *brw, struct gl_program *prog,
+                          enum shader_time_shader_type type, bool is_glsl_sh)
  {
     int shader_time_index = brw->shader_time.num_entries++;
     assert(shader_time_index < brw->shader_time.max_entries);
     brw->shader_time.types[shader_time_index] = type;
  
-   int id = shader_prog ? shader_prog->Name : prog->Id;
     const char *name;
-   if (id == 0) {
+   if (prog->Id == 0) {
        name = "ff";
-   } else if (!shader_prog) {
-      name = "prog";
-   } else if (shader_prog->Label) {
-      name = ralloc_strdup(brw->shader_time.names, shader_prog->Label);
+   } else if (is_glsl_sh) {
+      name = prog->info.label ?
+         ralloc_strdup(brw->shader_time.names, prog->info.label) : "glsl";
     } else {
-      name = "glsl";
+      name = "prog";
     }
  
     brw->shader_time.names[shader_time_index] = name;
-   brw->shader_time.ids[shader_time_index] = id;
+   brw->shader_time.ids[shader_time_index] = prog->Id;
  
     return shader_time_index;
  }
@@ -623,7 +782,7 @@ brw_get_shader_time_index(struct brw_context *brw,
  void
  brw_destroy_shader_time(struct brw_context *brw)
  {
-   drm_intel_bo_unreference(brw->shader_time.bo);
+   brw_bo_unreference(brw->shader_time.bo);
     brw->shader_time.bo = NULL;
  }
  
@@ -634,34 +793,22 @@ brw_stage_prog_data_free(const void *p)
  
     ralloc_free(prog_data->param);
     ralloc_free(prog_data->pull_param);
-   ralloc_free(prog_data->image_param);
  }
  
  void
-brw_dump_ir(const char *stage, struct gl_shader_program *shader_prog,
-            struct gl_linked_shader *shader, struct gl_program *prog)
+brw_dump_arb_asm(const char *stage, struct gl_program *prog)
  {
-   if (shader_prog) {
-      if (shader->ir) {
-         fprintf(stderr,
-                 "GLSL IR for native %s shader %d:\n",
-                 stage, shader_prog->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
-         fprintf(stderr, "\n\n");
-      }
-   } else {
-      fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
-              stage, prog->Id, stage);
-      _mesa_print_program(prog);
-   }
+   fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
+           stage, prog->Id, stage);
+   _mesa_print_program(prog);
  }
  
  void
-brw_setup_tex_for_precompile(struct brw_context *brw,
+brw_setup_tex_for_precompile(const struct gen_device_info *devinfo,
                               struct brw_sampler_prog_key_data *tex,
                               struct gl_program *prog)
  {
-   const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
+   const bool has_shader_channel_select = devinfo->is_haswell || devinfo->gen >= 8;
     unsigned sampler_count = util_last_bit(prog->SamplersUsed);
     for (unsigned i = 0; i < sampler_count; i++) {
        if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
@@ -674,3 +821,133 @@ brw_setup_tex_for_precompile(struct brw_context *brw,
        }
     }
  }
+
+/**
+ * Sets up the starting offsets for the groups of binding table entries
+ * common to all pipeline stages.
+ *
+ * Unused groups are initialized to 0xd0d0d0d0 to make it obvious that they're
+ * unused but also make sure that addition of small offsets to them will
+ * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
+ */
+uint32_t
+brw_assign_common_binding_table_offsets(const struct gen_device_info *devinfo,
+                                        const struct gl_program *prog,
+                                        struct brw_stage_prog_data *stage_prog_data,
+                                        uint32_t next_binding_table_offset)
+{
+   int num_textures = util_last_bit(prog->SamplersUsed);
+
+   stage_prog_data->binding_table.texture_start = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   if (prog->info.num_ubos) {
+      assert(prog->info.num_ubos <= BRW_MAX_UBO);
+      stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_ubos;
+   } else {
+      stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.num_ssbos || prog->info.num_abos) {
+      assert(prog->info.num_abos <= BRW_MAX_ABO);
+      assert(prog->info.num_ssbos <= BRW_MAX_SSBO);
+      stage_prog_data->binding_table.ssbo_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_abos + prog->info.num_ssbos;
+   } else {
+      stage_prog_data->binding_table.ssbo_start = 0xd0d0d0d0;
+   }
+
+   if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
+      stage_prog_data->binding_table.shader_time_start = next_binding_table_offset;
+      next_binding_table_offset++;
+   } else {
+      stage_prog_data->binding_table.shader_time_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.uses_texture_gather) {
+      if (devinfo->gen >= 8) {
+         stage_prog_data->binding_table.gather_texture_start =
+            stage_prog_data->binding_table.texture_start;
+      } else {
+         stage_prog_data->binding_table.gather_texture_start = next_binding_table_offset;
+         next_binding_table_offset += num_textures;
+      }
+   } else {
+      stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
+   }
+
+   if (prog->info.num_images) {
+      stage_prog_data->binding_table.image_start = next_binding_table_offset;
+      next_binding_table_offset += prog->info.num_images;
+   } else {
+      stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
+   }
+
+   /* This may or may not be used depending on how the compile goes. */
+   stage_prog_data->binding_table.pull_constants_start = next_binding_table_offset;
+   next_binding_table_offset++;
+
+   /* Plane 0 is just the regular texture section */
+   stage_prog_data->binding_table.plane_start[0] = stage_prog_data->binding_table.texture_start;
+
+   stage_prog_data->binding_table.plane_start[1] = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
+   next_binding_table_offset += num_textures;
+
+   /* Set the binding table size.  Some callers may append new entries
+    * and increase this accordingly.
+    */
+   stage_prog_data->binding_table.size_bytes = next_binding_table_offset * 4;
+
+   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
+   return next_binding_table_offset;
+}
+
+void
+brw_prog_key_set_id(union brw_any_prog_key *key, gl_shader_stage stage,
+                    unsigned id)
+{
+   static const unsigned stage_offsets[] = {
+      offsetof(struct brw_vs_prog_key, program_string_id),
+      offsetof(struct brw_tcs_prog_key, program_string_id),
+      offsetof(struct brw_tes_prog_key, program_string_id),
+      offsetof(struct brw_gs_prog_key, program_string_id),
+      offsetof(struct brw_wm_prog_key, program_string_id),
+      offsetof(struct brw_cs_prog_key, program_string_id),
+   };
+   assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_offsets));
+   *(unsigned*)((uint8_t*)key + stage_offsets[stage]) = id;
+}
+
+void
+brw_populate_default_key(const struct gen_device_info *devinfo,
+                         union brw_any_prog_key *prog_key,
+                         struct gl_shader_program *sh_prog,
+                         struct gl_program *prog)
+{
+   switch (prog->info.stage) {
+   case MESA_SHADER_VERTEX:
+      brw_vs_populate_default_key(devinfo, &prog_key->vs, prog);
+      break;
+   case MESA_SHADER_TESS_CTRL:
+      brw_tcs_populate_default_key(devinfo, &prog_key->tcs, sh_prog, prog);
+      break;
+   case MESA_SHADER_TESS_EVAL:
+      brw_tes_populate_default_key(devinfo, &prog_key->tes, sh_prog, prog);
+      break;
+   case MESA_SHADER_GEOMETRY:
+      brw_gs_populate_default_key(devinfo, &prog_key->gs, prog);
+      break;
+   case MESA_SHADER_FRAGMENT:
+      brw_wm_populate_default_key(devinfo, &prog_key->wm, prog);
+      break;
+   case MESA_SHADER_COMPUTE:
+      brw_cs_populate_default_key(devinfo, &prog_key->cs, prog);
+      break;
+   default:
+      unreachable("Unsupported stage!");
+   }
+}